sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E 16 17logger = logging.getLogger("sqlglot") 18 19 20def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 21 if len(args) == 1 and args[0].is_star: 22 return exp.StarMap(this=args[0]) 23 24 keys = [] 25 values = [] 26 for i in range(0, len(args), 2): 27 keys.append(args[i]) 28 values.append(args[i + 1]) 29 30 return exp.VarMap( 31 keys=exp.Array(expressions=keys), 32 values=exp.Array(expressions=values), 33 ) 34 35 36def parse_like(args: t.List) -> exp.Escape | exp.Like: 37 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 38 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 39 40 41def binary_range_parser( 42 expr_type: t.Type[exp.Expression], 43) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 44 return lambda self, this: self._parse_escape( 45 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 46 ) 47 48 49class _Parser(type): 50 def __new__(cls, clsname, bases, attrs): 51 klass = super().__new__(cls, clsname, bases, attrs) 52 53 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 54 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 55 56 return klass 57 58 59class Parser(metaclass=_Parser): 60 """ 61 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 62 63 Args: 64 error_level: The desired error level. 65 Default: ErrorLevel.IMMEDIATE 66 error_message_context: Determines the amount of context to capture from a 67 query string when displaying the error message (in number of characters). 68 Default: 100 69 max_errors: Maximum number of error messages to include in a raised ParseError. 70 This is only relevant if error_level is ErrorLevel.RAISE. 71 Default: 3 72 """ 73 74 FUNCTIONS: t.Dict[str, t.Callable] = { 75 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 76 "DATE_TO_DATE_STR": lambda args: exp.Cast( 77 this=seq_get(args, 0), 78 to=exp.DataType(this=exp.DataType.Type.TEXT), 79 ), 80 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 81 "LIKE": parse_like, 82 "TIME_TO_TIME_STR": lambda args: exp.Cast( 83 this=seq_get(args, 0), 84 to=exp.DataType(this=exp.DataType.Type.TEXT), 85 ), 86 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 87 this=exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 start=exp.Literal.number(1), 92 length=exp.Literal.number(10), 93 ), 94 "VAR_MAP": parse_var_map, 95 } 96 97 NO_PAREN_FUNCTIONS = { 98 TokenType.CURRENT_DATE: exp.CurrentDate, 99 TokenType.CURRENT_DATETIME: exp.CurrentDate, 100 TokenType.CURRENT_TIME: exp.CurrentTime, 101 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 102 TokenType.CURRENT_USER: exp.CurrentUser, 103 } 104 105 NESTED_TYPE_TOKENS = { 106 TokenType.ARRAY, 107 TokenType.MAP, 108 TokenType.NULLABLE, 109 TokenType.STRUCT, 110 } 111 112 ENUM_TYPE_TOKENS = { 113 TokenType.ENUM, 114 } 115 116 TYPE_TOKENS = { 117 TokenType.BIT, 118 TokenType.BOOLEAN, 119 TokenType.TINYINT, 120 TokenType.UTINYINT, 121 TokenType.SMALLINT, 122 TokenType.USMALLINT, 123 TokenType.INT, 124 TokenType.UINT, 125 TokenType.BIGINT, 126 TokenType.UBIGINT, 127 TokenType.INT128, 128 TokenType.UINT128, 129 TokenType.INT256, 130 TokenType.UINT256, 131 TokenType.FLOAT, 132 TokenType.DOUBLE, 133 TokenType.CHAR, 134 TokenType.NCHAR, 135 TokenType.VARCHAR, 136 TokenType.NVARCHAR, 137 TokenType.TEXT, 138 TokenType.MEDIUMTEXT, 139 TokenType.LONGTEXT, 140 TokenType.MEDIUMBLOB, 141 TokenType.LONGBLOB, 142 TokenType.BINARY, 143 TokenType.VARBINARY, 144 TokenType.JSON, 145 TokenType.JSONB, 146 TokenType.INTERVAL, 147 TokenType.TIME, 148 TokenType.TIMESTAMP, 149 TokenType.TIMESTAMPTZ, 150 TokenType.TIMESTAMPLTZ, 151 TokenType.DATETIME, 152 TokenType.DATETIME64, 153 TokenType.DATE, 154 TokenType.INT4RANGE, 155 TokenType.INT4MULTIRANGE, 156 TokenType.INT8RANGE, 157 TokenType.INT8MULTIRANGE, 158 TokenType.NUMRANGE, 159 TokenType.NUMMULTIRANGE, 160 TokenType.TSRANGE, 161 TokenType.TSMULTIRANGE, 162 TokenType.TSTZRANGE, 163 TokenType.TSTZMULTIRANGE, 164 TokenType.DATERANGE, 165 TokenType.DATEMULTIRANGE, 166 TokenType.DECIMAL, 167 TokenType.BIGDECIMAL, 168 TokenType.UUID, 169 TokenType.GEOGRAPHY, 170 TokenType.GEOMETRY, 171 TokenType.HLLSKETCH, 172 TokenType.HSTORE, 173 TokenType.PSEUDO_TYPE, 174 TokenType.SUPER, 175 TokenType.SERIAL, 176 TokenType.SMALLSERIAL, 177 TokenType.BIGSERIAL, 178 TokenType.XML, 179 TokenType.UNIQUEIDENTIFIER, 180 TokenType.USERDEFINED, 181 TokenType.MONEY, 182 TokenType.SMALLMONEY, 183 TokenType.ROWVERSION, 184 TokenType.IMAGE, 185 TokenType.VARIANT, 186 TokenType.OBJECT, 187 TokenType.INET, 188 TokenType.IPADDRESS, 189 TokenType.IPPREFIX, 190 TokenType.ENUM, 191 *NESTED_TYPE_TOKENS, 192 } 193 194 SUBQUERY_PREDICATES = { 195 TokenType.ANY: exp.Any, 196 TokenType.ALL: exp.All, 197 TokenType.EXISTS: exp.Exists, 198 TokenType.SOME: exp.Any, 199 } 200 201 RESERVED_KEYWORDS = { 202 *Tokenizer.SINGLE_TOKENS.values(), 203 TokenType.SELECT, 204 } 205 206 DB_CREATABLES = { 207 TokenType.DATABASE, 208 TokenType.SCHEMA, 209 TokenType.TABLE, 210 TokenType.VIEW, 211 TokenType.DICTIONARY, 212 } 213 214 CREATABLES = { 215 TokenType.COLUMN, 216 TokenType.FUNCTION, 217 TokenType.INDEX, 218 TokenType.PROCEDURE, 219 *DB_CREATABLES, 220 } 221 222 # Tokens that can represent identifiers 223 ID_VAR_TOKENS = { 224 TokenType.VAR, 225 TokenType.ANTI, 226 TokenType.APPLY, 227 TokenType.ASC, 228 TokenType.AUTO_INCREMENT, 229 TokenType.BEGIN, 230 TokenType.CACHE, 231 TokenType.CASE, 232 TokenType.COLLATE, 233 TokenType.COMMAND, 234 TokenType.COMMENT, 235 TokenType.COMMIT, 236 TokenType.CONSTRAINT, 237 TokenType.DEFAULT, 238 TokenType.DELETE, 239 TokenType.DESC, 240 TokenType.DESCRIBE, 241 TokenType.DICTIONARY, 242 TokenType.DIV, 243 TokenType.END, 244 TokenType.EXECUTE, 245 TokenType.ESCAPE, 246 TokenType.FALSE, 247 TokenType.FIRST, 248 TokenType.FILTER, 249 TokenType.FORMAT, 250 TokenType.FULL, 251 TokenType.IF, 252 TokenType.IS, 253 TokenType.ISNULL, 254 TokenType.INTERVAL, 255 TokenType.KEEP, 256 TokenType.LEFT, 257 TokenType.LOAD, 258 TokenType.MERGE, 259 TokenType.NATURAL, 260 TokenType.NEXT, 261 TokenType.OFFSET, 262 TokenType.ORDINALITY, 263 TokenType.OVERWRITE, 264 TokenType.PARTITION, 265 TokenType.PERCENT, 266 TokenType.PIVOT, 267 TokenType.PRAGMA, 268 TokenType.RANGE, 269 TokenType.REFERENCES, 270 TokenType.RIGHT, 271 TokenType.ROW, 272 TokenType.ROWS, 273 TokenType.SEMI, 274 TokenType.SET, 275 TokenType.SETTINGS, 276 TokenType.SHOW, 277 TokenType.TEMPORARY, 278 TokenType.TOP, 279 TokenType.TRUE, 280 TokenType.UNIQUE, 281 TokenType.UNPIVOT, 282 TokenType.UPDATE, 283 TokenType.VOLATILE, 284 TokenType.WINDOW, 285 *CREATABLES, 286 *SUBQUERY_PREDICATES, 287 *TYPE_TOKENS, 288 *NO_PAREN_FUNCTIONS, 289 } 290 291 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 292 293 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 294 TokenType.APPLY, 295 TokenType.ASOF, 296 TokenType.FULL, 297 TokenType.LEFT, 298 TokenType.LOCK, 299 TokenType.NATURAL, 300 TokenType.OFFSET, 301 TokenType.RIGHT, 302 TokenType.WINDOW, 303 } 304 305 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 306 307 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 308 309 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 310 311 FUNC_TOKENS = { 312 TokenType.COMMAND, 313 TokenType.CURRENT_DATE, 314 TokenType.CURRENT_DATETIME, 315 TokenType.CURRENT_TIMESTAMP, 316 TokenType.CURRENT_TIME, 317 TokenType.CURRENT_USER, 318 TokenType.FILTER, 319 TokenType.FIRST, 320 TokenType.FORMAT, 321 TokenType.GLOB, 322 TokenType.IDENTIFIER, 323 TokenType.INDEX, 324 TokenType.ISNULL, 325 TokenType.ILIKE, 326 TokenType.LIKE, 327 TokenType.MERGE, 328 TokenType.OFFSET, 329 TokenType.PRIMARY_KEY, 330 TokenType.RANGE, 331 TokenType.REPLACE, 332 TokenType.RLIKE, 333 TokenType.ROW, 334 TokenType.UNNEST, 335 TokenType.VAR, 336 TokenType.LEFT, 337 TokenType.RIGHT, 338 TokenType.DATE, 339 TokenType.DATETIME, 340 TokenType.TABLE, 341 TokenType.TIMESTAMP, 342 TokenType.TIMESTAMPTZ, 343 TokenType.WINDOW, 344 TokenType.XOR, 345 *TYPE_TOKENS, 346 *SUBQUERY_PREDICATES, 347 } 348 349 CONJUNCTION = { 350 TokenType.AND: exp.And, 351 TokenType.OR: exp.Or, 352 } 353 354 EQUALITY = { 355 TokenType.EQ: exp.EQ, 356 TokenType.NEQ: exp.NEQ, 357 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 358 } 359 360 COMPARISON = { 361 TokenType.GT: exp.GT, 362 TokenType.GTE: exp.GTE, 363 TokenType.LT: exp.LT, 364 TokenType.LTE: exp.LTE, 365 } 366 367 BITWISE = { 368 TokenType.AMP: exp.BitwiseAnd, 369 TokenType.CARET: exp.BitwiseXor, 370 TokenType.PIPE: exp.BitwiseOr, 371 TokenType.DPIPE: exp.DPipe, 372 } 373 374 TERM = { 375 TokenType.DASH: exp.Sub, 376 TokenType.PLUS: exp.Add, 377 TokenType.MOD: exp.Mod, 378 TokenType.COLLATE: exp.Collate, 379 } 380 381 FACTOR = { 382 TokenType.DIV: exp.IntDiv, 383 TokenType.LR_ARROW: exp.Distance, 384 TokenType.SLASH: exp.Div, 385 TokenType.STAR: exp.Mul, 386 } 387 388 TIMESTAMPS = { 389 TokenType.TIME, 390 TokenType.TIMESTAMP, 391 TokenType.TIMESTAMPTZ, 392 TokenType.TIMESTAMPLTZ, 393 } 394 395 SET_OPERATIONS = { 396 TokenType.UNION, 397 TokenType.INTERSECT, 398 TokenType.EXCEPT, 399 } 400 401 JOIN_METHODS = { 402 TokenType.NATURAL, 403 TokenType.ASOF, 404 } 405 406 JOIN_SIDES = { 407 TokenType.LEFT, 408 TokenType.RIGHT, 409 TokenType.FULL, 410 } 411 412 JOIN_KINDS = { 413 TokenType.INNER, 414 TokenType.OUTER, 415 TokenType.CROSS, 416 TokenType.SEMI, 417 TokenType.ANTI, 418 } 419 420 JOIN_HINTS: t.Set[str] = set() 421 422 LAMBDAS = { 423 TokenType.ARROW: lambda self, expressions: self.expression( 424 exp.Lambda, 425 this=self._replace_lambda( 426 self._parse_conjunction(), 427 {node.name for node in expressions}, 428 ), 429 expressions=expressions, 430 ), 431 TokenType.FARROW: lambda self, expressions: self.expression( 432 exp.Kwarg, 433 this=exp.var(expressions[0].name), 434 expression=self._parse_conjunction(), 435 ), 436 } 437 438 COLUMN_OPERATORS = { 439 TokenType.DOT: None, 440 TokenType.DCOLON: lambda self, this, to: self.expression( 441 exp.Cast if self.STRICT_CAST else exp.TryCast, 442 this=this, 443 to=to, 444 ), 445 TokenType.ARROW: lambda self, this, path: self.expression( 446 exp.JSONExtract, 447 this=this, 448 expression=path, 449 ), 450 TokenType.DARROW: lambda self, this, path: self.expression( 451 exp.JSONExtractScalar, 452 this=this, 453 expression=path, 454 ), 455 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 456 exp.JSONBExtract, 457 this=this, 458 expression=path, 459 ), 460 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 461 exp.JSONBExtractScalar, 462 this=this, 463 expression=path, 464 ), 465 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 466 exp.JSONBContains, 467 this=this, 468 expression=key, 469 ), 470 } 471 472 EXPRESSION_PARSERS = { 473 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 474 exp.Column: lambda self: self._parse_column(), 475 exp.Condition: lambda self: self._parse_conjunction(), 476 exp.DataType: lambda self: self._parse_types(), 477 exp.Expression: lambda self: self._parse_statement(), 478 exp.From: lambda self: self._parse_from(), 479 exp.Group: lambda self: self._parse_group(), 480 exp.Having: lambda self: self._parse_having(), 481 exp.Identifier: lambda self: self._parse_id_var(), 482 exp.Join: lambda self: self._parse_join(), 483 exp.Lambda: lambda self: self._parse_lambda(), 484 exp.Lateral: lambda self: self._parse_lateral(), 485 exp.Limit: lambda self: self._parse_limit(), 486 exp.Offset: lambda self: self._parse_offset(), 487 exp.Order: lambda self: self._parse_order(), 488 exp.Ordered: lambda self: self._parse_ordered(), 489 exp.Properties: lambda self: self._parse_properties(), 490 exp.Qualify: lambda self: self._parse_qualify(), 491 exp.Returning: lambda self: self._parse_returning(), 492 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 493 exp.Table: lambda self: self._parse_table_parts(), 494 exp.TableAlias: lambda self: self._parse_table_alias(), 495 exp.Where: lambda self: self._parse_where(), 496 exp.Window: lambda self: self._parse_named_window(), 497 exp.With: lambda self: self._parse_with(), 498 "JOIN_TYPE": lambda self: self._parse_join_parts(), 499 } 500 501 STATEMENT_PARSERS = { 502 TokenType.ALTER: lambda self: self._parse_alter(), 503 TokenType.BEGIN: lambda self: self._parse_transaction(), 504 TokenType.CACHE: lambda self: self._parse_cache(), 505 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 506 TokenType.COMMENT: lambda self: self._parse_comment(), 507 TokenType.CREATE: lambda self: self._parse_create(), 508 TokenType.DELETE: lambda self: self._parse_delete(), 509 TokenType.DESC: lambda self: self._parse_describe(), 510 TokenType.DESCRIBE: lambda self: self._parse_describe(), 511 TokenType.DROP: lambda self: self._parse_drop(), 512 TokenType.FROM: lambda self: exp.select("*").from_( 513 t.cast(exp.From, self._parse_from(skip_from_token=True)) 514 ), 515 TokenType.INSERT: lambda self: self._parse_insert(), 516 TokenType.LOAD: lambda self: self._parse_load(), 517 TokenType.MERGE: lambda self: self._parse_merge(), 518 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 519 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 520 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 521 TokenType.SET: lambda self: self._parse_set(), 522 TokenType.UNCACHE: lambda self: self._parse_uncache(), 523 TokenType.UPDATE: lambda self: self._parse_update(), 524 TokenType.USE: lambda self: self.expression( 525 exp.Use, 526 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 527 and exp.var(self._prev.text), 528 this=self._parse_table(schema=False), 529 ), 530 } 531 532 UNARY_PARSERS = { 533 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 534 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 535 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 536 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 537 } 538 539 PRIMARY_PARSERS = { 540 TokenType.STRING: lambda self, token: self.expression( 541 exp.Literal, this=token.text, is_string=True 542 ), 543 TokenType.NUMBER: lambda self, token: self.expression( 544 exp.Literal, this=token.text, is_string=False 545 ), 546 TokenType.STAR: lambda self, _: self.expression( 547 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 548 ), 549 TokenType.NULL: lambda self, _: self.expression(exp.Null), 550 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 551 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 552 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 553 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 554 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 555 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 556 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 557 exp.National, this=token.text 558 ), 559 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 560 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 561 } 562 563 PLACEHOLDER_PARSERS = { 564 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 565 TokenType.PARAMETER: lambda self: self._parse_parameter(), 566 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 567 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 568 else None, 569 } 570 571 RANGE_PARSERS = { 572 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 573 TokenType.GLOB: binary_range_parser(exp.Glob), 574 TokenType.ILIKE: binary_range_parser(exp.ILike), 575 TokenType.IN: lambda self, this: self._parse_in(this), 576 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 577 TokenType.IS: lambda self, this: self._parse_is(this), 578 TokenType.LIKE: binary_range_parser(exp.Like), 579 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 580 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 581 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 582 } 583 584 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 585 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 586 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 587 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 588 "CHARACTER SET": lambda self: self._parse_character_set(), 589 "CHECKSUM": lambda self: self._parse_checksum(), 590 "CLUSTER BY": lambda self: self._parse_cluster(), 591 "CLUSTERED": lambda self: self._parse_clustered_by(), 592 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 593 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 594 "COPY": lambda self: self._parse_copy_property(), 595 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 596 "DEFINER": lambda self: self._parse_definer(), 597 "DETERMINISTIC": lambda self: self.expression( 598 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 599 ), 600 "DISTKEY": lambda self: self._parse_distkey(), 601 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 602 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 603 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 604 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 605 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 606 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 607 "FREESPACE": lambda self: self._parse_freespace(), 608 "IMMUTABLE": lambda self: self.expression( 609 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 610 ), 611 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 612 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 613 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 614 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 615 "LIKE": lambda self: self._parse_create_like(), 616 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 617 "LOCK": lambda self: self._parse_locking(), 618 "LOCKING": lambda self: self._parse_locking(), 619 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 620 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 621 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 622 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 623 "NO": lambda self: self._parse_no_property(), 624 "ON": lambda self: self._parse_on_property(), 625 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 626 "PARTITION BY": lambda self: self._parse_partitioned_by(), 627 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 628 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 629 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 630 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 631 "RETURNS": lambda self: self._parse_returns(), 632 "ROW": lambda self: self._parse_row(), 633 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 634 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 635 "SETTINGS": lambda self: self.expression( 636 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 637 ), 638 "SORTKEY": lambda self: self._parse_sortkey(), 639 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 640 "STABLE": lambda self: self.expression( 641 exp.StabilityProperty, this=exp.Literal.string("STABLE") 642 ), 643 "STORED": lambda self: self._parse_stored(), 644 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 645 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 646 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 647 "TO": lambda self: self._parse_to_table(), 648 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 649 "TTL": lambda self: self._parse_ttl(), 650 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 651 "VOLATILE": lambda self: self._parse_volatile_property(), 652 "WITH": lambda self: self._parse_with_property(), 653 } 654 655 CONSTRAINT_PARSERS = { 656 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 657 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 658 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 659 "CHARACTER SET": lambda self: self.expression( 660 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 661 ), 662 "CHECK": lambda self: self.expression( 663 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 664 ), 665 "COLLATE": lambda self: self.expression( 666 exp.CollateColumnConstraint, this=self._parse_var() 667 ), 668 "COMMENT": lambda self: self.expression( 669 exp.CommentColumnConstraint, this=self._parse_string() 670 ), 671 "COMPRESS": lambda self: self._parse_compress(), 672 "DEFAULT": lambda self: self.expression( 673 exp.DefaultColumnConstraint, this=self._parse_bitwise() 674 ), 675 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 676 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 677 "FORMAT": lambda self: self.expression( 678 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 679 ), 680 "GENERATED": lambda self: self._parse_generated_as_identity(), 681 "IDENTITY": lambda self: self._parse_auto_increment(), 682 "INLINE": lambda self: self._parse_inline(), 683 "LIKE": lambda self: self._parse_create_like(), 684 "NOT": lambda self: self._parse_not_constraint(), 685 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 686 "ON": lambda self: self._match(TokenType.UPDATE) 687 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 688 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 689 "PRIMARY KEY": lambda self: self._parse_primary_key(), 690 "REFERENCES": lambda self: self._parse_references(match=False), 691 "TITLE": lambda self: self.expression( 692 exp.TitleColumnConstraint, this=self._parse_var_or_string() 693 ), 694 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 695 "UNIQUE": lambda self: self._parse_unique(), 696 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 697 } 698 699 ALTER_PARSERS = { 700 "ADD": lambda self: self._parse_alter_table_add(), 701 "ALTER": lambda self: self._parse_alter_table_alter(), 702 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 703 "DROP": lambda self: self._parse_alter_table_drop(), 704 "RENAME": lambda self: self._parse_alter_table_rename(), 705 } 706 707 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 708 709 NO_PAREN_FUNCTION_PARSERS = { 710 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 711 TokenType.CASE: lambda self: self._parse_case(), 712 TokenType.IF: lambda self: self._parse_if(), 713 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 714 exp.NextValueFor, 715 this=self._parse_column(), 716 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 717 ), 718 } 719 720 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 721 722 FUNCTION_PARSERS = { 723 "ANY_VALUE": lambda self: self._parse_any_value(), 724 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 725 "CONCAT": lambda self: self._parse_concat(), 726 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 727 "DECODE": lambda self: self._parse_decode(), 728 "EXTRACT": lambda self: self._parse_extract(), 729 "JSON_OBJECT": lambda self: self._parse_json_object(), 730 "LOG": lambda self: self._parse_logarithm(), 731 "MATCH": lambda self: self._parse_match_against(), 732 "OPENJSON": lambda self: self._parse_open_json(), 733 "POSITION": lambda self: self._parse_position(), 734 "SAFE_CAST": lambda self: self._parse_cast(False), 735 "STRING_AGG": lambda self: self._parse_string_agg(), 736 "SUBSTRING": lambda self: self._parse_substring(), 737 "TRIM": lambda self: self._parse_trim(), 738 "TRY_CAST": lambda self: self._parse_cast(False), 739 "TRY_CONVERT": lambda self: self._parse_convert(False), 740 } 741 742 QUERY_MODIFIER_PARSERS = { 743 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 744 TokenType.WHERE: lambda self: ("where", self._parse_where()), 745 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 746 TokenType.HAVING: lambda self: ("having", self._parse_having()), 747 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 748 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 749 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 750 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 751 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 752 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 753 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 754 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 755 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 756 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 757 TokenType.CLUSTER_BY: lambda self: ( 758 "cluster", 759 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 760 ), 761 TokenType.DISTRIBUTE_BY: lambda self: ( 762 "distribute", 763 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 764 ), 765 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 766 } 767 768 SET_PARSERS = { 769 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 770 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 771 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 772 "TRANSACTION": lambda self: self._parse_set_transaction(), 773 } 774 775 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 776 777 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 778 779 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 780 781 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 782 783 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 784 785 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 786 TRANSACTION_CHARACTERISTICS = { 787 "ISOLATION LEVEL REPEATABLE READ", 788 "ISOLATION LEVEL READ COMMITTED", 789 "ISOLATION LEVEL READ UNCOMMITTED", 790 "ISOLATION LEVEL SERIALIZABLE", 791 "READ WRITE", 792 "READ ONLY", 793 } 794 795 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 796 797 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 798 799 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 800 801 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 802 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 803 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 804 805 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 806 807 STRICT_CAST = True 808 809 # A NULL arg in CONCAT yields NULL by default 810 CONCAT_NULL_OUTPUTS_STRING = False 811 812 PREFIXED_PIVOT_COLUMNS = False 813 IDENTIFY_PIVOT_STRINGS = False 814 815 LOG_BASE_FIRST = True 816 LOG_DEFAULTS_TO_LN = False 817 818 __slots__ = ( 819 "error_level", 820 "error_message_context", 821 "max_errors", 822 "sql", 823 "errors", 824 "_tokens", 825 "_index", 826 "_curr", 827 "_next", 828 "_prev", 829 "_prev_comments", 830 ) 831 832 # Autofilled 833 INDEX_OFFSET: int = 0 834 UNNEST_COLUMN_ONLY: bool = False 835 ALIAS_POST_TABLESAMPLE: bool = False 836 STRICT_STRING_CONCAT = False 837 NULL_ORDERING: str = "nulls_are_small" 838 SHOW_TRIE: t.Dict = {} 839 SET_TRIE: t.Dict = {} 840 FORMAT_MAPPING: t.Dict[str, str] = {} 841 FORMAT_TRIE: t.Dict = {} 842 TIME_MAPPING: t.Dict[str, str] = {} 843 TIME_TRIE: t.Dict = {} 844 845 def __init__( 846 self, 847 error_level: t.Optional[ErrorLevel] = None, 848 error_message_context: int = 100, 849 max_errors: int = 3, 850 ): 851 self.error_level = error_level or ErrorLevel.IMMEDIATE 852 self.error_message_context = error_message_context 853 self.max_errors = max_errors 854 self.reset() 855 856 def reset(self): 857 self.sql = "" 858 self.errors = [] 859 self._tokens = [] 860 self._index = 0 861 self._curr = None 862 self._next = None 863 self._prev = None 864 self._prev_comments = None 865 866 def parse( 867 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 868 ) -> t.List[t.Optional[exp.Expression]]: 869 """ 870 Parses a list of tokens and returns a list of syntax trees, one tree 871 per parsed SQL statement. 872 873 Args: 874 raw_tokens: The list of tokens. 875 sql: The original SQL string, used to produce helpful debug messages. 876 877 Returns: 878 The list of the produced syntax trees. 879 """ 880 return self._parse( 881 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 882 ) 883 884 def parse_into( 885 self, 886 expression_types: exp.IntoType, 887 raw_tokens: t.List[Token], 888 sql: t.Optional[str] = None, 889 ) -> t.List[t.Optional[exp.Expression]]: 890 """ 891 Parses a list of tokens into a given Expression type. If a collection of Expression 892 types is given instead, this method will try to parse the token list into each one 893 of them, stopping at the first for which the parsing succeeds. 894 895 Args: 896 expression_types: The expression type(s) to try and parse the token list into. 897 raw_tokens: The list of tokens. 898 sql: The original SQL string, used to produce helpful debug messages. 899 900 Returns: 901 The target Expression. 902 """ 903 errors = [] 904 for expression_type in ensure_list(expression_types): 905 parser = self.EXPRESSION_PARSERS.get(expression_type) 906 if not parser: 907 raise TypeError(f"No parser registered for {expression_type}") 908 909 try: 910 return self._parse(parser, raw_tokens, sql) 911 except ParseError as e: 912 e.errors[0]["into_expression"] = expression_type 913 errors.append(e) 914 915 raise ParseError( 916 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 917 errors=merge_errors(errors), 918 ) from errors[-1] 919 920 def _parse( 921 self, 922 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 923 raw_tokens: t.List[Token], 924 sql: t.Optional[str] = None, 925 ) -> t.List[t.Optional[exp.Expression]]: 926 self.reset() 927 self.sql = sql or "" 928 929 total = len(raw_tokens) 930 chunks: t.List[t.List[Token]] = [[]] 931 932 for i, token in enumerate(raw_tokens): 933 if token.token_type == TokenType.SEMICOLON: 934 if i < total - 1: 935 chunks.append([]) 936 else: 937 chunks[-1].append(token) 938 939 expressions = [] 940 941 for tokens in chunks: 942 self._index = -1 943 self._tokens = tokens 944 self._advance() 945 946 expressions.append(parse_method(self)) 947 948 if self._index < len(self._tokens): 949 self.raise_error("Invalid expression / Unexpected token") 950 951 self.check_errors() 952 953 return expressions 954 955 def check_errors(self) -> None: 956 """Logs or raises any found errors, depending on the chosen error level setting.""" 957 if self.error_level == ErrorLevel.WARN: 958 for error in self.errors: 959 logger.error(str(error)) 960 elif self.error_level == ErrorLevel.RAISE and self.errors: 961 raise ParseError( 962 concat_messages(self.errors, self.max_errors), 963 errors=merge_errors(self.errors), 964 ) 965 966 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 967 """ 968 Appends an error in the list of recorded errors or raises it, depending on the chosen 969 error level setting. 970 """ 971 token = token or self._curr or self._prev or Token.string("") 972 start = token.start 973 end = token.end + 1 974 start_context = self.sql[max(start - self.error_message_context, 0) : start] 975 highlight = self.sql[start:end] 976 end_context = self.sql[end : end + self.error_message_context] 977 978 error = ParseError.new( 979 f"{message}. Line {token.line}, Col: {token.col}.\n" 980 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 981 description=message, 982 line=token.line, 983 col=token.col, 984 start_context=start_context, 985 highlight=highlight, 986 end_context=end_context, 987 ) 988 989 if self.error_level == ErrorLevel.IMMEDIATE: 990 raise error 991 992 self.errors.append(error) 993 994 def expression( 995 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 996 ) -> E: 997 """ 998 Creates a new, validated Expression. 999 1000 Args: 1001 exp_class: The expression class to instantiate. 1002 comments: An optional list of comments to attach to the expression. 1003 kwargs: The arguments to set for the expression along with their respective values. 1004 1005 Returns: 1006 The target expression. 1007 """ 1008 instance = exp_class(**kwargs) 1009 instance.add_comments(comments) if comments else self._add_comments(instance) 1010 return self.validate_expression(instance) 1011 1012 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1013 if expression and self._prev_comments: 1014 expression.add_comments(self._prev_comments) 1015 self._prev_comments = None 1016 1017 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1018 """ 1019 Validates an Expression, making sure that all its mandatory arguments are set. 1020 1021 Args: 1022 expression: The expression to validate. 1023 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1024 1025 Returns: 1026 The validated expression. 1027 """ 1028 if self.error_level != ErrorLevel.IGNORE: 1029 for error_message in expression.error_messages(args): 1030 self.raise_error(error_message) 1031 1032 return expression 1033 1034 def _find_sql(self, start: Token, end: Token) -> str: 1035 return self.sql[start.start : end.end + 1] 1036 1037 def _advance(self, times: int = 1) -> None: 1038 self._index += times 1039 self._curr = seq_get(self._tokens, self._index) 1040 self._next = seq_get(self._tokens, self._index + 1) 1041 1042 if self._index > 0: 1043 self._prev = self._tokens[self._index - 1] 1044 self._prev_comments = self._prev.comments 1045 else: 1046 self._prev = None 1047 self._prev_comments = None 1048 1049 def _retreat(self, index: int) -> None: 1050 if index != self._index: 1051 self._advance(index - self._index) 1052 1053 def _parse_command(self) -> exp.Command: 1054 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1055 1056 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1057 start = self._prev 1058 exists = self._parse_exists() if allow_exists else None 1059 1060 self._match(TokenType.ON) 1061 1062 kind = self._match_set(self.CREATABLES) and self._prev 1063 if not kind: 1064 return self._parse_as_command(start) 1065 1066 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1067 this = self._parse_user_defined_function(kind=kind.token_type) 1068 elif kind.token_type == TokenType.TABLE: 1069 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1070 elif kind.token_type == TokenType.COLUMN: 1071 this = self._parse_column() 1072 else: 1073 this = self._parse_id_var() 1074 1075 self._match(TokenType.IS) 1076 1077 return self.expression( 1078 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1079 ) 1080 1081 def _parse_to_table( 1082 self, 1083 ) -> exp.ToTableProperty: 1084 table = self._parse_table_parts(schema=True) 1085 return self.expression(exp.ToTableProperty, this=table) 1086 1087 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1088 def _parse_ttl(self) -> exp.Expression: 1089 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1090 this = self._parse_bitwise() 1091 1092 if self._match_text_seq("DELETE"): 1093 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1094 if self._match_text_seq("RECOMPRESS"): 1095 return self.expression( 1096 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1097 ) 1098 if self._match_text_seq("TO", "DISK"): 1099 return self.expression( 1100 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1101 ) 1102 if self._match_text_seq("TO", "VOLUME"): 1103 return self.expression( 1104 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1105 ) 1106 1107 return this 1108 1109 expressions = self._parse_csv(_parse_ttl_action) 1110 where = self._parse_where() 1111 group = self._parse_group() 1112 1113 aggregates = None 1114 if group and self._match(TokenType.SET): 1115 aggregates = self._parse_csv(self._parse_set_item) 1116 1117 return self.expression( 1118 exp.MergeTreeTTL, 1119 expressions=expressions, 1120 where=where, 1121 group=group, 1122 aggregates=aggregates, 1123 ) 1124 1125 def _parse_statement(self) -> t.Optional[exp.Expression]: 1126 if self._curr is None: 1127 return None 1128 1129 if self._match_set(self.STATEMENT_PARSERS): 1130 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1131 1132 if self._match_set(Tokenizer.COMMANDS): 1133 return self._parse_command() 1134 1135 expression = self._parse_expression() 1136 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1137 return self._parse_query_modifiers(expression) 1138 1139 def _parse_drop(self) -> exp.Drop | exp.Command: 1140 start = self._prev 1141 temporary = self._match(TokenType.TEMPORARY) 1142 materialized = self._match_text_seq("MATERIALIZED") 1143 1144 kind = self._match_set(self.CREATABLES) and self._prev.text 1145 if not kind: 1146 return self._parse_as_command(start) 1147 1148 return self.expression( 1149 exp.Drop, 1150 comments=start.comments, 1151 exists=self._parse_exists(), 1152 this=self._parse_table(schema=True), 1153 kind=kind, 1154 temporary=temporary, 1155 materialized=materialized, 1156 cascade=self._match_text_seq("CASCADE"), 1157 constraints=self._match_text_seq("CONSTRAINTS"), 1158 purge=self._match_text_seq("PURGE"), 1159 ) 1160 1161 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1162 return ( 1163 self._match(TokenType.IF) 1164 and (not not_ or self._match(TokenType.NOT)) 1165 and self._match(TokenType.EXISTS) 1166 ) 1167 1168 def _parse_create(self) -> exp.Create | exp.Command: 1169 # Note: this can't be None because we've matched a statement parser 1170 start = self._prev 1171 replace = start.text.upper() == "REPLACE" or self._match_pair( 1172 TokenType.OR, TokenType.REPLACE 1173 ) 1174 unique = self._match(TokenType.UNIQUE) 1175 1176 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1177 self._advance() 1178 1179 properties = None 1180 create_token = self._match_set(self.CREATABLES) and self._prev 1181 1182 if not create_token: 1183 # exp.Properties.Location.POST_CREATE 1184 properties = self._parse_properties() 1185 create_token = self._match_set(self.CREATABLES) and self._prev 1186 1187 if not properties or not create_token: 1188 return self._parse_as_command(start) 1189 1190 exists = self._parse_exists(not_=True) 1191 this = None 1192 expression = None 1193 indexes = None 1194 no_schema_binding = None 1195 begin = None 1196 clone = None 1197 1198 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1199 nonlocal properties 1200 if properties and temp_props: 1201 properties.expressions.extend(temp_props.expressions) 1202 elif temp_props: 1203 properties = temp_props 1204 1205 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1206 this = self._parse_user_defined_function(kind=create_token.token_type) 1207 1208 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1209 extend_props(self._parse_properties()) 1210 1211 self._match(TokenType.ALIAS) 1212 begin = self._match(TokenType.BEGIN) 1213 return_ = self._match_text_seq("RETURN") 1214 expression = self._parse_statement() 1215 1216 if return_: 1217 expression = self.expression(exp.Return, this=expression) 1218 elif create_token.token_type == TokenType.INDEX: 1219 this = self._parse_index(index=self._parse_id_var()) 1220 elif create_token.token_type in self.DB_CREATABLES: 1221 table_parts = self._parse_table_parts(schema=True) 1222 1223 # exp.Properties.Location.POST_NAME 1224 self._match(TokenType.COMMA) 1225 extend_props(self._parse_properties(before=True)) 1226 1227 this = self._parse_schema(this=table_parts) 1228 1229 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1230 extend_props(self._parse_properties()) 1231 1232 self._match(TokenType.ALIAS) 1233 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1234 # exp.Properties.Location.POST_ALIAS 1235 extend_props(self._parse_properties()) 1236 1237 expression = self._parse_ddl_select() 1238 1239 if create_token.token_type == TokenType.TABLE: 1240 # exp.Properties.Location.POST_EXPRESSION 1241 extend_props(self._parse_properties()) 1242 1243 indexes = [] 1244 while True: 1245 index = self._parse_index() 1246 1247 # exp.Properties.Location.POST_INDEX 1248 extend_props(self._parse_properties()) 1249 1250 if not index: 1251 break 1252 else: 1253 self._match(TokenType.COMMA) 1254 indexes.append(index) 1255 elif create_token.token_type == TokenType.VIEW: 1256 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1257 no_schema_binding = True 1258 1259 if self._match_text_seq("CLONE"): 1260 clone = self._parse_table(schema=True) 1261 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1262 clone_kind = ( 1263 self._match(TokenType.L_PAREN) 1264 and self._match_texts(self.CLONE_KINDS) 1265 and self._prev.text.upper() 1266 ) 1267 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1268 self._match(TokenType.R_PAREN) 1269 clone = self.expression( 1270 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1271 ) 1272 1273 return self.expression( 1274 exp.Create, 1275 this=this, 1276 kind=create_token.text, 1277 replace=replace, 1278 unique=unique, 1279 expression=expression, 1280 exists=exists, 1281 properties=properties, 1282 indexes=indexes, 1283 no_schema_binding=no_schema_binding, 1284 begin=begin, 1285 clone=clone, 1286 ) 1287 1288 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1289 # only used for teradata currently 1290 self._match(TokenType.COMMA) 1291 1292 kwargs = { 1293 "no": self._match_text_seq("NO"), 1294 "dual": self._match_text_seq("DUAL"), 1295 "before": self._match_text_seq("BEFORE"), 1296 "default": self._match_text_seq("DEFAULT"), 1297 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1298 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1299 "after": self._match_text_seq("AFTER"), 1300 "minimum": self._match_texts(("MIN", "MINIMUM")), 1301 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1302 } 1303 1304 if self._match_texts(self.PROPERTY_PARSERS): 1305 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1306 try: 1307 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1308 except TypeError: 1309 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1310 1311 return None 1312 1313 def _parse_property(self) -> t.Optional[exp.Expression]: 1314 if self._match_texts(self.PROPERTY_PARSERS): 1315 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1316 1317 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1318 return self._parse_character_set(default=True) 1319 1320 if self._match_text_seq("COMPOUND", "SORTKEY"): 1321 return self._parse_sortkey(compound=True) 1322 1323 if self._match_text_seq("SQL", "SECURITY"): 1324 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1325 1326 assignment = self._match_pair( 1327 TokenType.VAR, TokenType.EQ, advance=False 1328 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1329 1330 if assignment: 1331 key = self._parse_var_or_string() 1332 self._match(TokenType.EQ) 1333 return self.expression(exp.Property, this=key, value=self._parse_column()) 1334 1335 return None 1336 1337 def _parse_stored(self) -> exp.FileFormatProperty: 1338 self._match(TokenType.ALIAS) 1339 1340 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1341 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1342 1343 return self.expression( 1344 exp.FileFormatProperty, 1345 this=self.expression( 1346 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1347 ) 1348 if input_format or output_format 1349 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1350 ) 1351 1352 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1353 self._match(TokenType.EQ) 1354 self._match(TokenType.ALIAS) 1355 return self.expression(exp_class, this=self._parse_field()) 1356 1357 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1358 properties = [] 1359 while True: 1360 if before: 1361 prop = self._parse_property_before() 1362 else: 1363 prop = self._parse_property() 1364 1365 if not prop: 1366 break 1367 for p in ensure_list(prop): 1368 properties.append(p) 1369 1370 if properties: 1371 return self.expression(exp.Properties, expressions=properties) 1372 1373 return None 1374 1375 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1376 return self.expression( 1377 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1378 ) 1379 1380 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1381 if self._index >= 2: 1382 pre_volatile_token = self._tokens[self._index - 2] 1383 else: 1384 pre_volatile_token = None 1385 1386 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1387 return exp.VolatileProperty() 1388 1389 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1390 1391 def _parse_with_property( 1392 self, 1393 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1394 if self._match(TokenType.L_PAREN, advance=False): 1395 return self._parse_wrapped_csv(self._parse_property) 1396 1397 if self._match_text_seq("JOURNAL"): 1398 return self._parse_withjournaltable() 1399 1400 if self._match_text_seq("DATA"): 1401 return self._parse_withdata(no=False) 1402 elif self._match_text_seq("NO", "DATA"): 1403 return self._parse_withdata(no=True) 1404 1405 if not self._next: 1406 return None 1407 1408 return self._parse_withisolatedloading() 1409 1410 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1411 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1412 self._match(TokenType.EQ) 1413 1414 user = self._parse_id_var() 1415 self._match(TokenType.PARAMETER) 1416 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1417 1418 if not user or not host: 1419 return None 1420 1421 return exp.DefinerProperty(this=f"{user}@{host}") 1422 1423 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1424 self._match(TokenType.TABLE) 1425 self._match(TokenType.EQ) 1426 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1427 1428 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1429 return self.expression(exp.LogProperty, no=no) 1430 1431 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1432 return self.expression(exp.JournalProperty, **kwargs) 1433 1434 def _parse_checksum(self) -> exp.ChecksumProperty: 1435 self._match(TokenType.EQ) 1436 1437 on = None 1438 if self._match(TokenType.ON): 1439 on = True 1440 elif self._match_text_seq("OFF"): 1441 on = False 1442 1443 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1444 1445 def _parse_cluster(self) -> exp.Cluster: 1446 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1447 1448 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1449 self._match_text_seq("BY") 1450 1451 self._match_l_paren() 1452 expressions = self._parse_csv(self._parse_column) 1453 self._match_r_paren() 1454 1455 if self._match_text_seq("SORTED", "BY"): 1456 self._match_l_paren() 1457 sorted_by = self._parse_csv(self._parse_ordered) 1458 self._match_r_paren() 1459 else: 1460 sorted_by = None 1461 1462 self._match(TokenType.INTO) 1463 buckets = self._parse_number() 1464 self._match_text_seq("BUCKETS") 1465 1466 return self.expression( 1467 exp.ClusteredByProperty, 1468 expressions=expressions, 1469 sorted_by=sorted_by, 1470 buckets=buckets, 1471 ) 1472 1473 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1474 if not self._match_text_seq("GRANTS"): 1475 self._retreat(self._index - 1) 1476 return None 1477 1478 return self.expression(exp.CopyGrantsProperty) 1479 1480 def _parse_freespace(self) -> exp.FreespaceProperty: 1481 self._match(TokenType.EQ) 1482 return self.expression( 1483 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1484 ) 1485 1486 def _parse_mergeblockratio( 1487 self, no: bool = False, default: bool = False 1488 ) -> exp.MergeBlockRatioProperty: 1489 if self._match(TokenType.EQ): 1490 return self.expression( 1491 exp.MergeBlockRatioProperty, 1492 this=self._parse_number(), 1493 percent=self._match(TokenType.PERCENT), 1494 ) 1495 1496 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1497 1498 def _parse_datablocksize( 1499 self, 1500 default: t.Optional[bool] = None, 1501 minimum: t.Optional[bool] = None, 1502 maximum: t.Optional[bool] = None, 1503 ) -> exp.DataBlocksizeProperty: 1504 self._match(TokenType.EQ) 1505 size = self._parse_number() 1506 1507 units = None 1508 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1509 units = self._prev.text 1510 1511 return self.expression( 1512 exp.DataBlocksizeProperty, 1513 size=size, 1514 units=units, 1515 default=default, 1516 minimum=minimum, 1517 maximum=maximum, 1518 ) 1519 1520 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1521 self._match(TokenType.EQ) 1522 always = self._match_text_seq("ALWAYS") 1523 manual = self._match_text_seq("MANUAL") 1524 never = self._match_text_seq("NEVER") 1525 default = self._match_text_seq("DEFAULT") 1526 1527 autotemp = None 1528 if self._match_text_seq("AUTOTEMP"): 1529 autotemp = self._parse_schema() 1530 1531 return self.expression( 1532 exp.BlockCompressionProperty, 1533 always=always, 1534 manual=manual, 1535 never=never, 1536 default=default, 1537 autotemp=autotemp, 1538 ) 1539 1540 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1541 no = self._match_text_seq("NO") 1542 concurrent = self._match_text_seq("CONCURRENT") 1543 self._match_text_seq("ISOLATED", "LOADING") 1544 for_all = self._match_text_seq("FOR", "ALL") 1545 for_insert = self._match_text_seq("FOR", "INSERT") 1546 for_none = self._match_text_seq("FOR", "NONE") 1547 return self.expression( 1548 exp.IsolatedLoadingProperty, 1549 no=no, 1550 concurrent=concurrent, 1551 for_all=for_all, 1552 for_insert=for_insert, 1553 for_none=for_none, 1554 ) 1555 1556 def _parse_locking(self) -> exp.LockingProperty: 1557 if self._match(TokenType.TABLE): 1558 kind = "TABLE" 1559 elif self._match(TokenType.VIEW): 1560 kind = "VIEW" 1561 elif self._match(TokenType.ROW): 1562 kind = "ROW" 1563 elif self._match_text_seq("DATABASE"): 1564 kind = "DATABASE" 1565 else: 1566 kind = None 1567 1568 if kind in ("DATABASE", "TABLE", "VIEW"): 1569 this = self._parse_table_parts() 1570 else: 1571 this = None 1572 1573 if self._match(TokenType.FOR): 1574 for_or_in = "FOR" 1575 elif self._match(TokenType.IN): 1576 for_or_in = "IN" 1577 else: 1578 for_or_in = None 1579 1580 if self._match_text_seq("ACCESS"): 1581 lock_type = "ACCESS" 1582 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1583 lock_type = "EXCLUSIVE" 1584 elif self._match_text_seq("SHARE"): 1585 lock_type = "SHARE" 1586 elif self._match_text_seq("READ"): 1587 lock_type = "READ" 1588 elif self._match_text_seq("WRITE"): 1589 lock_type = "WRITE" 1590 elif self._match_text_seq("CHECKSUM"): 1591 lock_type = "CHECKSUM" 1592 else: 1593 lock_type = None 1594 1595 override = self._match_text_seq("OVERRIDE") 1596 1597 return self.expression( 1598 exp.LockingProperty, 1599 this=this, 1600 kind=kind, 1601 for_or_in=for_or_in, 1602 lock_type=lock_type, 1603 override=override, 1604 ) 1605 1606 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1607 if self._match(TokenType.PARTITION_BY): 1608 return self._parse_csv(self._parse_conjunction) 1609 return [] 1610 1611 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1612 self._match(TokenType.EQ) 1613 return self.expression( 1614 exp.PartitionedByProperty, 1615 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1616 ) 1617 1618 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1619 if self._match_text_seq("AND", "STATISTICS"): 1620 statistics = True 1621 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1622 statistics = False 1623 else: 1624 statistics = None 1625 1626 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1627 1628 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1629 if self._match_text_seq("PRIMARY", "INDEX"): 1630 return exp.NoPrimaryIndexProperty() 1631 return None 1632 1633 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1634 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1635 return exp.OnCommitProperty() 1636 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1637 return exp.OnCommitProperty(delete=True) 1638 return None 1639 1640 def _parse_distkey(self) -> exp.DistKeyProperty: 1641 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1642 1643 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1644 table = self._parse_table(schema=True) 1645 1646 options = [] 1647 while self._match_texts(("INCLUDING", "EXCLUDING")): 1648 this = self._prev.text.upper() 1649 1650 id_var = self._parse_id_var() 1651 if not id_var: 1652 return None 1653 1654 options.append( 1655 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1656 ) 1657 1658 return self.expression(exp.LikeProperty, this=table, expressions=options) 1659 1660 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1661 return self.expression( 1662 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1663 ) 1664 1665 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1666 self._match(TokenType.EQ) 1667 return self.expression( 1668 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1669 ) 1670 1671 def _parse_returns(self) -> exp.ReturnsProperty: 1672 value: t.Optional[exp.Expression] 1673 is_table = self._match(TokenType.TABLE) 1674 1675 if is_table: 1676 if self._match(TokenType.LT): 1677 value = self.expression( 1678 exp.Schema, 1679 this="TABLE", 1680 expressions=self._parse_csv(self._parse_struct_types), 1681 ) 1682 if not self._match(TokenType.GT): 1683 self.raise_error("Expecting >") 1684 else: 1685 value = self._parse_schema(exp.var("TABLE")) 1686 else: 1687 value = self._parse_types() 1688 1689 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1690 1691 def _parse_describe(self) -> exp.Describe: 1692 kind = self._match_set(self.CREATABLES) and self._prev.text 1693 this = self._parse_table() 1694 return self.expression(exp.Describe, this=this, kind=kind) 1695 1696 def _parse_insert(self) -> exp.Insert: 1697 comments = ensure_list(self._prev_comments) 1698 overwrite = self._match(TokenType.OVERWRITE) 1699 ignore = self._match(TokenType.IGNORE) 1700 local = self._match_text_seq("LOCAL") 1701 alternative = None 1702 1703 if self._match_text_seq("DIRECTORY"): 1704 this: t.Optional[exp.Expression] = self.expression( 1705 exp.Directory, 1706 this=self._parse_var_or_string(), 1707 local=local, 1708 row_format=self._parse_row_format(match_row=True), 1709 ) 1710 else: 1711 if self._match(TokenType.OR): 1712 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1713 1714 self._match(TokenType.INTO) 1715 comments += ensure_list(self._prev_comments) 1716 self._match(TokenType.TABLE) 1717 this = self._parse_table(schema=True) 1718 1719 returning = self._parse_returning() 1720 1721 return self.expression( 1722 exp.Insert, 1723 comments=comments, 1724 this=this, 1725 exists=self._parse_exists(), 1726 partition=self._parse_partition(), 1727 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1728 and self._parse_conjunction(), 1729 expression=self._parse_ddl_select(), 1730 conflict=self._parse_on_conflict(), 1731 returning=returning or self._parse_returning(), 1732 overwrite=overwrite, 1733 alternative=alternative, 1734 ignore=ignore, 1735 ) 1736 1737 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1738 conflict = self._match_text_seq("ON", "CONFLICT") 1739 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1740 1741 if not conflict and not duplicate: 1742 return None 1743 1744 nothing = None 1745 expressions = None 1746 key = None 1747 constraint = None 1748 1749 if conflict: 1750 if self._match_text_seq("ON", "CONSTRAINT"): 1751 constraint = self._parse_id_var() 1752 else: 1753 key = self._parse_csv(self._parse_value) 1754 1755 self._match_text_seq("DO") 1756 if self._match_text_seq("NOTHING"): 1757 nothing = True 1758 else: 1759 self._match(TokenType.UPDATE) 1760 self._match(TokenType.SET) 1761 expressions = self._parse_csv(self._parse_equality) 1762 1763 return self.expression( 1764 exp.OnConflict, 1765 duplicate=duplicate, 1766 expressions=expressions, 1767 nothing=nothing, 1768 key=key, 1769 constraint=constraint, 1770 ) 1771 1772 def _parse_returning(self) -> t.Optional[exp.Returning]: 1773 if not self._match(TokenType.RETURNING): 1774 return None 1775 return self.expression( 1776 exp.Returning, 1777 expressions=self._parse_csv(self._parse_expression), 1778 into=self._match(TokenType.INTO) and self._parse_table_part(), 1779 ) 1780 1781 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1782 if not self._match(TokenType.FORMAT): 1783 return None 1784 return self._parse_row_format() 1785 1786 def _parse_row_format( 1787 self, match_row: bool = False 1788 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1789 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1790 return None 1791 1792 if self._match_text_seq("SERDE"): 1793 this = self._parse_string() 1794 1795 serde_properties = None 1796 if self._match(TokenType.SERDE_PROPERTIES): 1797 serde_properties = self.expression( 1798 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1799 ) 1800 1801 return self.expression( 1802 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1803 ) 1804 1805 self._match_text_seq("DELIMITED") 1806 1807 kwargs = {} 1808 1809 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1810 kwargs["fields"] = self._parse_string() 1811 if self._match_text_seq("ESCAPED", "BY"): 1812 kwargs["escaped"] = self._parse_string() 1813 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1814 kwargs["collection_items"] = self._parse_string() 1815 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1816 kwargs["map_keys"] = self._parse_string() 1817 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1818 kwargs["lines"] = self._parse_string() 1819 if self._match_text_seq("NULL", "DEFINED", "AS"): 1820 kwargs["null"] = self._parse_string() 1821 1822 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1823 1824 def _parse_load(self) -> exp.LoadData | exp.Command: 1825 if self._match_text_seq("DATA"): 1826 local = self._match_text_seq("LOCAL") 1827 self._match_text_seq("INPATH") 1828 inpath = self._parse_string() 1829 overwrite = self._match(TokenType.OVERWRITE) 1830 self._match_pair(TokenType.INTO, TokenType.TABLE) 1831 1832 return self.expression( 1833 exp.LoadData, 1834 this=self._parse_table(schema=True), 1835 local=local, 1836 overwrite=overwrite, 1837 inpath=inpath, 1838 partition=self._parse_partition(), 1839 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1840 serde=self._match_text_seq("SERDE") and self._parse_string(), 1841 ) 1842 return self._parse_as_command(self._prev) 1843 1844 def _parse_delete(self) -> exp.Delete: 1845 # This handles MySQL's "Multiple-Table Syntax" 1846 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1847 tables = None 1848 comments = self._prev_comments 1849 if not self._match(TokenType.FROM, advance=False): 1850 tables = self._parse_csv(self._parse_table) or None 1851 1852 returning = self._parse_returning() 1853 1854 return self.expression( 1855 exp.Delete, 1856 comments=comments, 1857 tables=tables, 1858 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1859 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1860 where=self._parse_where(), 1861 returning=returning or self._parse_returning(), 1862 limit=self._parse_limit(), 1863 ) 1864 1865 def _parse_update(self) -> exp.Update: 1866 comments = self._prev_comments 1867 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1868 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1869 returning = self._parse_returning() 1870 return self.expression( 1871 exp.Update, 1872 comments=comments, 1873 **{ # type: ignore 1874 "this": this, 1875 "expressions": expressions, 1876 "from": self._parse_from(joins=True), 1877 "where": self._parse_where(), 1878 "returning": returning or self._parse_returning(), 1879 "limit": self._parse_limit(), 1880 }, 1881 ) 1882 1883 def _parse_uncache(self) -> exp.Uncache: 1884 if not self._match(TokenType.TABLE): 1885 self.raise_error("Expecting TABLE after UNCACHE") 1886 1887 return self.expression( 1888 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1889 ) 1890 1891 def _parse_cache(self) -> exp.Cache: 1892 lazy = self._match_text_seq("LAZY") 1893 self._match(TokenType.TABLE) 1894 table = self._parse_table(schema=True) 1895 1896 options = [] 1897 if self._match_text_seq("OPTIONS"): 1898 self._match_l_paren() 1899 k = self._parse_string() 1900 self._match(TokenType.EQ) 1901 v = self._parse_string() 1902 options = [k, v] 1903 self._match_r_paren() 1904 1905 self._match(TokenType.ALIAS) 1906 return self.expression( 1907 exp.Cache, 1908 this=table, 1909 lazy=lazy, 1910 options=options, 1911 expression=self._parse_select(nested=True), 1912 ) 1913 1914 def _parse_partition(self) -> t.Optional[exp.Partition]: 1915 if not self._match(TokenType.PARTITION): 1916 return None 1917 1918 return self.expression( 1919 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1920 ) 1921 1922 def _parse_value(self) -> exp.Tuple: 1923 if self._match(TokenType.L_PAREN): 1924 expressions = self._parse_csv(self._parse_conjunction) 1925 self._match_r_paren() 1926 return self.expression(exp.Tuple, expressions=expressions) 1927 1928 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1929 # https://prestodb.io/docs/current/sql/values.html 1930 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1931 1932 def _parse_select( 1933 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1934 ) -> t.Optional[exp.Expression]: 1935 cte = self._parse_with() 1936 if cte: 1937 this = self._parse_statement() 1938 1939 if not this: 1940 self.raise_error("Failed to parse any statement following CTE") 1941 return cte 1942 1943 if "with" in this.arg_types: 1944 this.set("with", cte) 1945 else: 1946 self.raise_error(f"{this.key} does not support CTE") 1947 this = cte 1948 elif self._match(TokenType.SELECT): 1949 comments = self._prev_comments 1950 1951 hint = self._parse_hint() 1952 all_ = self._match(TokenType.ALL) 1953 distinct = self._match(TokenType.DISTINCT) 1954 1955 kind = ( 1956 self._match(TokenType.ALIAS) 1957 and self._match_texts(("STRUCT", "VALUE")) 1958 and self._prev.text 1959 ) 1960 1961 if distinct: 1962 distinct = self.expression( 1963 exp.Distinct, 1964 on=self._parse_value() if self._match(TokenType.ON) else None, 1965 ) 1966 1967 if all_ and distinct: 1968 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1969 1970 limit = self._parse_limit(top=True) 1971 expressions = self._parse_expressions() 1972 1973 this = self.expression( 1974 exp.Select, 1975 kind=kind, 1976 hint=hint, 1977 distinct=distinct, 1978 expressions=expressions, 1979 limit=limit, 1980 ) 1981 this.comments = comments 1982 1983 into = self._parse_into() 1984 if into: 1985 this.set("into", into) 1986 1987 from_ = self._parse_from() 1988 if from_: 1989 this.set("from", from_) 1990 1991 this = self._parse_query_modifiers(this) 1992 elif (table or nested) and self._match(TokenType.L_PAREN): 1993 if self._match(TokenType.PIVOT): 1994 this = self._parse_simplified_pivot() 1995 elif self._match(TokenType.FROM): 1996 this = exp.select("*").from_( 1997 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1998 ) 1999 else: 2000 this = self._parse_table() if table else self._parse_select(nested=True) 2001 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2002 2003 self._match_r_paren() 2004 2005 # We return early here so that the UNION isn't attached to the subquery by the 2006 # following call to _parse_set_operations, but instead becomes the parent node 2007 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2008 elif self._match(TokenType.VALUES): 2009 this = self.expression( 2010 exp.Values, 2011 expressions=self._parse_csv(self._parse_value), 2012 alias=self._parse_table_alias(), 2013 ) 2014 else: 2015 this = None 2016 2017 return self._parse_set_operations(this) 2018 2019 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2020 if not skip_with_token and not self._match(TokenType.WITH): 2021 return None 2022 2023 comments = self._prev_comments 2024 recursive = self._match(TokenType.RECURSIVE) 2025 2026 expressions = [] 2027 while True: 2028 expressions.append(self._parse_cte()) 2029 2030 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2031 break 2032 else: 2033 self._match(TokenType.WITH) 2034 2035 return self.expression( 2036 exp.With, comments=comments, expressions=expressions, recursive=recursive 2037 ) 2038 2039 def _parse_cte(self) -> exp.CTE: 2040 alias = self._parse_table_alias() 2041 if not alias or not alias.this: 2042 self.raise_error("Expected CTE to have alias") 2043 2044 self._match(TokenType.ALIAS) 2045 return self.expression( 2046 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2047 ) 2048 2049 def _parse_table_alias( 2050 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2051 ) -> t.Optional[exp.TableAlias]: 2052 any_token = self._match(TokenType.ALIAS) 2053 alias = ( 2054 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2055 or self._parse_string_as_identifier() 2056 ) 2057 2058 index = self._index 2059 if self._match(TokenType.L_PAREN): 2060 columns = self._parse_csv(self._parse_function_parameter) 2061 self._match_r_paren() if columns else self._retreat(index) 2062 else: 2063 columns = None 2064 2065 if not alias and not columns: 2066 return None 2067 2068 return self.expression(exp.TableAlias, this=alias, columns=columns) 2069 2070 def _parse_subquery( 2071 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2072 ) -> t.Optional[exp.Subquery]: 2073 if not this: 2074 return None 2075 2076 return self.expression( 2077 exp.Subquery, 2078 this=this, 2079 pivots=self._parse_pivots(), 2080 alias=self._parse_table_alias() if parse_alias else None, 2081 ) 2082 2083 def _parse_query_modifiers( 2084 self, this: t.Optional[exp.Expression] 2085 ) -> t.Optional[exp.Expression]: 2086 if isinstance(this, self.MODIFIABLES): 2087 for join in iter(self._parse_join, None): 2088 this.append("joins", join) 2089 for lateral in iter(self._parse_lateral, None): 2090 this.append("laterals", lateral) 2091 2092 while True: 2093 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2094 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2095 key, expression = parser(self) 2096 2097 if expression: 2098 this.set(key, expression) 2099 if key == "limit": 2100 offset = expression.args.pop("offset", None) 2101 if offset: 2102 this.set("offset", exp.Offset(expression=offset)) 2103 continue 2104 break 2105 return this 2106 2107 def _parse_hint(self) -> t.Optional[exp.Hint]: 2108 if self._match(TokenType.HINT): 2109 hints = [] 2110 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2111 hints.extend(hint) 2112 2113 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2114 self.raise_error("Expected */ after HINT") 2115 2116 return self.expression(exp.Hint, expressions=hints) 2117 2118 return None 2119 2120 def _parse_into(self) -> t.Optional[exp.Into]: 2121 if not self._match(TokenType.INTO): 2122 return None 2123 2124 temp = self._match(TokenType.TEMPORARY) 2125 unlogged = self._match_text_seq("UNLOGGED") 2126 self._match(TokenType.TABLE) 2127 2128 return self.expression( 2129 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2130 ) 2131 2132 def _parse_from( 2133 self, joins: bool = False, skip_from_token: bool = False 2134 ) -> t.Optional[exp.From]: 2135 if not skip_from_token and not self._match(TokenType.FROM): 2136 return None 2137 2138 return self.expression( 2139 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2140 ) 2141 2142 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2143 if not self._match(TokenType.MATCH_RECOGNIZE): 2144 return None 2145 2146 self._match_l_paren() 2147 2148 partition = self._parse_partition_by() 2149 order = self._parse_order() 2150 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2151 2152 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2153 rows = exp.var("ONE ROW PER MATCH") 2154 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2155 text = "ALL ROWS PER MATCH" 2156 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2157 text += f" SHOW EMPTY MATCHES" 2158 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2159 text += f" OMIT EMPTY MATCHES" 2160 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2161 text += f" WITH UNMATCHED ROWS" 2162 rows = exp.var(text) 2163 else: 2164 rows = None 2165 2166 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2167 text = "AFTER MATCH SKIP" 2168 if self._match_text_seq("PAST", "LAST", "ROW"): 2169 text += f" PAST LAST ROW" 2170 elif self._match_text_seq("TO", "NEXT", "ROW"): 2171 text += f" TO NEXT ROW" 2172 elif self._match_text_seq("TO", "FIRST"): 2173 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2174 elif self._match_text_seq("TO", "LAST"): 2175 text += f" TO LAST {self._advance_any().text}" # type: ignore 2176 after = exp.var(text) 2177 else: 2178 after = None 2179 2180 if self._match_text_seq("PATTERN"): 2181 self._match_l_paren() 2182 2183 if not self._curr: 2184 self.raise_error("Expecting )", self._curr) 2185 2186 paren = 1 2187 start = self._curr 2188 2189 while self._curr and paren > 0: 2190 if self._curr.token_type == TokenType.L_PAREN: 2191 paren += 1 2192 if self._curr.token_type == TokenType.R_PAREN: 2193 paren -= 1 2194 2195 end = self._prev 2196 self._advance() 2197 2198 if paren > 0: 2199 self.raise_error("Expecting )", self._curr) 2200 2201 pattern = exp.var(self._find_sql(start, end)) 2202 else: 2203 pattern = None 2204 2205 define = ( 2206 self._parse_csv( 2207 lambda: self.expression( 2208 exp.Alias, 2209 alias=self._parse_id_var(any_token=True), 2210 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2211 ) 2212 ) 2213 if self._match_text_seq("DEFINE") 2214 else None 2215 ) 2216 2217 self._match_r_paren() 2218 2219 return self.expression( 2220 exp.MatchRecognize, 2221 partition_by=partition, 2222 order=order, 2223 measures=measures, 2224 rows=rows, 2225 after=after, 2226 pattern=pattern, 2227 define=define, 2228 alias=self._parse_table_alias(), 2229 ) 2230 2231 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2232 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2233 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2234 2235 if outer_apply or cross_apply: 2236 this = self._parse_select(table=True) 2237 view = None 2238 outer = not cross_apply 2239 elif self._match(TokenType.LATERAL): 2240 this = self._parse_select(table=True) 2241 view = self._match(TokenType.VIEW) 2242 outer = self._match(TokenType.OUTER) 2243 else: 2244 return None 2245 2246 if not this: 2247 this = ( 2248 self._parse_unnest() 2249 or self._parse_function() 2250 or self._parse_id_var(any_token=False) 2251 ) 2252 2253 while self._match(TokenType.DOT): 2254 this = exp.Dot( 2255 this=this, 2256 expression=self._parse_function() or self._parse_id_var(any_token=False), 2257 ) 2258 2259 if view: 2260 table = self._parse_id_var(any_token=False) 2261 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2262 table_alias: t.Optional[exp.TableAlias] = self.expression( 2263 exp.TableAlias, this=table, columns=columns 2264 ) 2265 elif isinstance(this, exp.Subquery) and this.alias: 2266 # Ensures parity between the Subquery's and the Lateral's "alias" args 2267 table_alias = this.args["alias"].copy() 2268 else: 2269 table_alias = self._parse_table_alias() 2270 2271 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2272 2273 def _parse_join_parts( 2274 self, 2275 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2276 return ( 2277 self._match_set(self.JOIN_METHODS) and self._prev, 2278 self._match_set(self.JOIN_SIDES) and self._prev, 2279 self._match_set(self.JOIN_KINDS) and self._prev, 2280 ) 2281 2282 def _parse_join( 2283 self, skip_join_token: bool = False, parse_bracket: bool = False 2284 ) -> t.Optional[exp.Join]: 2285 if self._match(TokenType.COMMA): 2286 return self.expression(exp.Join, this=self._parse_table()) 2287 2288 index = self._index 2289 method, side, kind = self._parse_join_parts() 2290 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2291 join = self._match(TokenType.JOIN) 2292 2293 if not skip_join_token and not join: 2294 self._retreat(index) 2295 kind = None 2296 method = None 2297 side = None 2298 2299 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2300 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2301 2302 if not skip_join_token and not join and not outer_apply and not cross_apply: 2303 return None 2304 2305 if outer_apply: 2306 side = Token(TokenType.LEFT, "LEFT") 2307 2308 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2309 2310 if method: 2311 kwargs["method"] = method.text 2312 if side: 2313 kwargs["side"] = side.text 2314 if kind: 2315 kwargs["kind"] = kind.text 2316 if hint: 2317 kwargs["hint"] = hint 2318 2319 if self._match(TokenType.ON): 2320 kwargs["on"] = self._parse_conjunction() 2321 elif self._match(TokenType.USING): 2322 kwargs["using"] = self._parse_wrapped_id_vars() 2323 elif not (kind and kind.token_type == TokenType.CROSS): 2324 index = self._index 2325 joins = self._parse_joins() 2326 2327 if joins and self._match(TokenType.ON): 2328 kwargs["on"] = self._parse_conjunction() 2329 elif joins and self._match(TokenType.USING): 2330 kwargs["using"] = self._parse_wrapped_id_vars() 2331 else: 2332 joins = None 2333 self._retreat(index) 2334 2335 kwargs["this"].set("joins", joins) 2336 2337 return self.expression(exp.Join, **kwargs) 2338 2339 def _parse_index( 2340 self, 2341 index: t.Optional[exp.Expression] = None, 2342 ) -> t.Optional[exp.Index]: 2343 if index: 2344 unique = None 2345 primary = None 2346 amp = None 2347 2348 self._match(TokenType.ON) 2349 self._match(TokenType.TABLE) # hive 2350 table = self._parse_table_parts(schema=True) 2351 else: 2352 unique = self._match(TokenType.UNIQUE) 2353 primary = self._match_text_seq("PRIMARY") 2354 amp = self._match_text_seq("AMP") 2355 2356 if not self._match(TokenType.INDEX): 2357 return None 2358 2359 index = self._parse_id_var() 2360 table = None 2361 2362 using = self._parse_field() if self._match(TokenType.USING) else None 2363 2364 if self._match(TokenType.L_PAREN, advance=False): 2365 columns = self._parse_wrapped_csv(self._parse_ordered) 2366 else: 2367 columns = None 2368 2369 return self.expression( 2370 exp.Index, 2371 this=index, 2372 table=table, 2373 using=using, 2374 columns=columns, 2375 unique=unique, 2376 primary=primary, 2377 amp=amp, 2378 partition_by=self._parse_partition_by(), 2379 ) 2380 2381 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2382 hints: t.List[exp.Expression] = [] 2383 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2384 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2385 hints.append( 2386 self.expression( 2387 exp.WithTableHint, 2388 expressions=self._parse_csv( 2389 lambda: self._parse_function() or self._parse_var(any_token=True) 2390 ), 2391 ) 2392 ) 2393 self._match_r_paren() 2394 else: 2395 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2396 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2397 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2398 2399 self._match_texts({"INDEX", "KEY"}) 2400 if self._match(TokenType.FOR): 2401 hint.set("target", self._advance_any() and self._prev.text.upper()) 2402 2403 hint.set("expressions", self._parse_wrapped_id_vars()) 2404 hints.append(hint) 2405 2406 return hints or None 2407 2408 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2409 return ( 2410 (not schema and self._parse_function(optional_parens=False)) 2411 or self._parse_id_var(any_token=False) 2412 or self._parse_string_as_identifier() 2413 or self._parse_placeholder() 2414 ) 2415 2416 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2417 catalog = None 2418 db = None 2419 table = self._parse_table_part(schema=schema) 2420 2421 while self._match(TokenType.DOT): 2422 if catalog: 2423 # This allows nesting the table in arbitrarily many dot expressions if needed 2424 table = self.expression( 2425 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2426 ) 2427 else: 2428 catalog = db 2429 db = table 2430 table = self._parse_table_part(schema=schema) 2431 2432 if not table: 2433 self.raise_error(f"Expected table name but got {self._curr}") 2434 2435 return self.expression( 2436 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2437 ) 2438 2439 def _parse_table( 2440 self, 2441 schema: bool = False, 2442 joins: bool = False, 2443 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2444 parse_bracket: bool = False, 2445 ) -> t.Optional[exp.Expression]: 2446 lateral = self._parse_lateral() 2447 if lateral: 2448 return lateral 2449 2450 unnest = self._parse_unnest() 2451 if unnest: 2452 return unnest 2453 2454 values = self._parse_derived_table_values() 2455 if values: 2456 return values 2457 2458 subquery = self._parse_select(table=True) 2459 if subquery: 2460 if not subquery.args.get("pivots"): 2461 subquery.set("pivots", self._parse_pivots()) 2462 return subquery 2463 2464 bracket = parse_bracket and self._parse_bracket(None) 2465 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2466 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2467 2468 if schema: 2469 return self._parse_schema(this=this) 2470 2471 if self.ALIAS_POST_TABLESAMPLE: 2472 table_sample = self._parse_table_sample() 2473 2474 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2475 if alias: 2476 this.set("alias", alias) 2477 2478 if not this.args.get("pivots"): 2479 this.set("pivots", self._parse_pivots()) 2480 2481 this.set("hints", self._parse_table_hints()) 2482 2483 if not self.ALIAS_POST_TABLESAMPLE: 2484 table_sample = self._parse_table_sample() 2485 2486 if table_sample: 2487 table_sample.set("this", this) 2488 this = table_sample 2489 2490 if joins: 2491 for join in iter(self._parse_join, None): 2492 this.append("joins", join) 2493 2494 return this 2495 2496 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2497 if not self._match(TokenType.UNNEST): 2498 return None 2499 2500 expressions = self._parse_wrapped_csv(self._parse_type) 2501 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2502 2503 alias = self._parse_table_alias() if with_alias else None 2504 2505 if alias and self.UNNEST_COLUMN_ONLY: 2506 if alias.args.get("columns"): 2507 self.raise_error("Unexpected extra column alias in unnest.") 2508 2509 alias.set("columns", [alias.this]) 2510 alias.set("this", None) 2511 2512 offset = None 2513 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2514 self._match(TokenType.ALIAS) 2515 offset = self._parse_id_var() or exp.to_identifier("offset") 2516 2517 return self.expression( 2518 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2519 ) 2520 2521 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2522 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2523 if not is_derived and not self._match(TokenType.VALUES): 2524 return None 2525 2526 expressions = self._parse_csv(self._parse_value) 2527 alias = self._parse_table_alias() 2528 2529 if is_derived: 2530 self._match_r_paren() 2531 2532 return self.expression( 2533 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2534 ) 2535 2536 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2537 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2538 as_modifier and self._match_text_seq("USING", "SAMPLE") 2539 ): 2540 return None 2541 2542 bucket_numerator = None 2543 bucket_denominator = None 2544 bucket_field = None 2545 percent = None 2546 rows = None 2547 size = None 2548 seed = None 2549 2550 kind = ( 2551 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2552 ) 2553 method = self._parse_var(tokens=(TokenType.ROW,)) 2554 2555 self._match(TokenType.L_PAREN) 2556 2557 num = self._parse_number() 2558 2559 if self._match_text_seq("BUCKET"): 2560 bucket_numerator = self._parse_number() 2561 self._match_text_seq("OUT", "OF") 2562 bucket_denominator = bucket_denominator = self._parse_number() 2563 self._match(TokenType.ON) 2564 bucket_field = self._parse_field() 2565 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2566 percent = num 2567 elif self._match(TokenType.ROWS): 2568 rows = num 2569 else: 2570 size = num 2571 2572 self._match(TokenType.R_PAREN) 2573 2574 if self._match(TokenType.L_PAREN): 2575 method = self._parse_var() 2576 seed = self._match(TokenType.COMMA) and self._parse_number() 2577 self._match_r_paren() 2578 elif self._match_texts(("SEED", "REPEATABLE")): 2579 seed = self._parse_wrapped(self._parse_number) 2580 2581 return self.expression( 2582 exp.TableSample, 2583 method=method, 2584 bucket_numerator=bucket_numerator, 2585 bucket_denominator=bucket_denominator, 2586 bucket_field=bucket_field, 2587 percent=percent, 2588 rows=rows, 2589 size=size, 2590 seed=seed, 2591 kind=kind, 2592 ) 2593 2594 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2595 return list(iter(self._parse_pivot, None)) or None 2596 2597 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2598 return list(iter(self._parse_join, None)) or None 2599 2600 # https://duckdb.org/docs/sql/statements/pivot 2601 def _parse_simplified_pivot(self) -> exp.Pivot: 2602 def _parse_on() -> t.Optional[exp.Expression]: 2603 this = self._parse_bitwise() 2604 return self._parse_in(this) if self._match(TokenType.IN) else this 2605 2606 this = self._parse_table() 2607 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2608 using = self._match(TokenType.USING) and self._parse_csv( 2609 lambda: self._parse_alias(self._parse_function()) 2610 ) 2611 group = self._parse_group() 2612 return self.expression( 2613 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2614 ) 2615 2616 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2617 index = self._index 2618 2619 if self._match(TokenType.PIVOT): 2620 unpivot = False 2621 elif self._match(TokenType.UNPIVOT): 2622 unpivot = True 2623 else: 2624 return None 2625 2626 expressions = [] 2627 field = None 2628 2629 if not self._match(TokenType.L_PAREN): 2630 self._retreat(index) 2631 return None 2632 2633 if unpivot: 2634 expressions = self._parse_csv(self._parse_column) 2635 else: 2636 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2637 2638 if not expressions: 2639 self.raise_error("Failed to parse PIVOT's aggregation list") 2640 2641 if not self._match(TokenType.FOR): 2642 self.raise_error("Expecting FOR") 2643 2644 value = self._parse_column() 2645 2646 if not self._match(TokenType.IN): 2647 self.raise_error("Expecting IN") 2648 2649 field = self._parse_in(value, alias=True) 2650 2651 self._match_r_paren() 2652 2653 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2654 2655 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2656 pivot.set("alias", self._parse_table_alias()) 2657 2658 if not unpivot: 2659 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2660 2661 columns: t.List[exp.Expression] = [] 2662 for fld in pivot.args["field"].expressions: 2663 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2664 for name in names: 2665 if self.PREFIXED_PIVOT_COLUMNS: 2666 name = f"{name}_{field_name}" if name else field_name 2667 else: 2668 name = f"{field_name}_{name}" if name else field_name 2669 2670 columns.append(exp.to_identifier(name)) 2671 2672 pivot.set("columns", columns) 2673 2674 return pivot 2675 2676 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2677 return [agg.alias for agg in aggregations] 2678 2679 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2680 if not skip_where_token and not self._match(TokenType.WHERE): 2681 return None 2682 2683 return self.expression( 2684 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2685 ) 2686 2687 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2688 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2689 return None 2690 2691 elements = defaultdict(list) 2692 2693 if self._match(TokenType.ALL): 2694 return self.expression(exp.Group, all=True) 2695 2696 while True: 2697 expressions = self._parse_csv(self._parse_conjunction) 2698 if expressions: 2699 elements["expressions"].extend(expressions) 2700 2701 grouping_sets = self._parse_grouping_sets() 2702 if grouping_sets: 2703 elements["grouping_sets"].extend(grouping_sets) 2704 2705 rollup = None 2706 cube = None 2707 totals = None 2708 2709 with_ = self._match(TokenType.WITH) 2710 if self._match(TokenType.ROLLUP): 2711 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2712 elements["rollup"].extend(ensure_list(rollup)) 2713 2714 if self._match(TokenType.CUBE): 2715 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2716 elements["cube"].extend(ensure_list(cube)) 2717 2718 if self._match_text_seq("TOTALS"): 2719 totals = True 2720 elements["totals"] = True # type: ignore 2721 2722 if not (grouping_sets or rollup or cube or totals): 2723 break 2724 2725 return self.expression(exp.Group, **elements) # type: ignore 2726 2727 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2728 if not self._match(TokenType.GROUPING_SETS): 2729 return None 2730 2731 return self._parse_wrapped_csv(self._parse_grouping_set) 2732 2733 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2734 if self._match(TokenType.L_PAREN): 2735 grouping_set = self._parse_csv(self._parse_column) 2736 self._match_r_paren() 2737 return self.expression(exp.Tuple, expressions=grouping_set) 2738 2739 return self._parse_column() 2740 2741 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2742 if not skip_having_token and not self._match(TokenType.HAVING): 2743 return None 2744 return self.expression(exp.Having, this=self._parse_conjunction()) 2745 2746 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2747 if not self._match(TokenType.QUALIFY): 2748 return None 2749 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2750 2751 def _parse_order( 2752 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2753 ) -> t.Optional[exp.Expression]: 2754 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2755 return this 2756 2757 return self.expression( 2758 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2759 ) 2760 2761 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2762 if not self._match(token): 2763 return None 2764 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2765 2766 def _parse_ordered(self) -> exp.Ordered: 2767 this = self._parse_conjunction() 2768 self._match(TokenType.ASC) 2769 2770 is_desc = self._match(TokenType.DESC) 2771 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2772 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2773 desc = is_desc or False 2774 asc = not desc 2775 nulls_first = is_nulls_first or False 2776 explicitly_null_ordered = is_nulls_first or is_nulls_last 2777 2778 if ( 2779 not explicitly_null_ordered 2780 and ( 2781 (asc and self.NULL_ORDERING == "nulls_are_small") 2782 or (desc and self.NULL_ORDERING != "nulls_are_small") 2783 ) 2784 and self.NULL_ORDERING != "nulls_are_last" 2785 ): 2786 nulls_first = True 2787 2788 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2789 2790 def _parse_limit( 2791 self, this: t.Optional[exp.Expression] = None, top: bool = False 2792 ) -> t.Optional[exp.Expression]: 2793 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2794 comments = self._prev_comments 2795 if top: 2796 limit_paren = self._match(TokenType.L_PAREN) 2797 expression = self._parse_number() 2798 2799 if limit_paren: 2800 self._match_r_paren() 2801 else: 2802 expression = self._parse_term() 2803 2804 if self._match(TokenType.COMMA): 2805 offset = expression 2806 expression = self._parse_term() 2807 else: 2808 offset = None 2809 2810 limit_exp = self.expression( 2811 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2812 ) 2813 2814 return limit_exp 2815 2816 if self._match(TokenType.FETCH): 2817 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2818 direction = self._prev.text if direction else "FIRST" 2819 2820 count = self._parse_number() 2821 percent = self._match(TokenType.PERCENT) 2822 2823 self._match_set((TokenType.ROW, TokenType.ROWS)) 2824 2825 only = self._match_text_seq("ONLY") 2826 with_ties = self._match_text_seq("WITH", "TIES") 2827 2828 if only and with_ties: 2829 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2830 2831 return self.expression( 2832 exp.Fetch, 2833 direction=direction, 2834 count=count, 2835 percent=percent, 2836 with_ties=with_ties, 2837 ) 2838 2839 return this 2840 2841 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2842 if not self._match(TokenType.OFFSET): 2843 return this 2844 2845 count = self._parse_term() 2846 self._match_set((TokenType.ROW, TokenType.ROWS)) 2847 return self.expression(exp.Offset, this=this, expression=count) 2848 2849 def _parse_locks(self) -> t.List[exp.Lock]: 2850 locks = [] 2851 while True: 2852 if self._match_text_seq("FOR", "UPDATE"): 2853 update = True 2854 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2855 "LOCK", "IN", "SHARE", "MODE" 2856 ): 2857 update = False 2858 else: 2859 break 2860 2861 expressions = None 2862 if self._match_text_seq("OF"): 2863 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2864 2865 wait: t.Optional[bool | exp.Expression] = None 2866 if self._match_text_seq("NOWAIT"): 2867 wait = True 2868 elif self._match_text_seq("WAIT"): 2869 wait = self._parse_primary() 2870 elif self._match_text_seq("SKIP", "LOCKED"): 2871 wait = False 2872 2873 locks.append( 2874 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2875 ) 2876 2877 return locks 2878 2879 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2880 if not self._match_set(self.SET_OPERATIONS): 2881 return this 2882 2883 token_type = self._prev.token_type 2884 2885 if token_type == TokenType.UNION: 2886 expression = exp.Union 2887 elif token_type == TokenType.EXCEPT: 2888 expression = exp.Except 2889 else: 2890 expression = exp.Intersect 2891 2892 return self.expression( 2893 expression, 2894 this=this, 2895 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2896 expression=self._parse_set_operations(self._parse_select(nested=True)), 2897 ) 2898 2899 def _parse_expression(self) -> t.Optional[exp.Expression]: 2900 return self._parse_alias(self._parse_conjunction()) 2901 2902 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2903 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2904 2905 def _parse_equality(self) -> t.Optional[exp.Expression]: 2906 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2907 2908 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2909 return self._parse_tokens(self._parse_range, self.COMPARISON) 2910 2911 def _parse_range(self) -> t.Optional[exp.Expression]: 2912 this = self._parse_bitwise() 2913 negate = self._match(TokenType.NOT) 2914 2915 if self._match_set(self.RANGE_PARSERS): 2916 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2917 if not expression: 2918 return this 2919 2920 this = expression 2921 elif self._match(TokenType.ISNULL): 2922 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2923 2924 # Postgres supports ISNULL and NOTNULL for conditions. 2925 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2926 if self._match(TokenType.NOTNULL): 2927 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2928 this = self.expression(exp.Not, this=this) 2929 2930 if negate: 2931 this = self.expression(exp.Not, this=this) 2932 2933 if self._match(TokenType.IS): 2934 this = self._parse_is(this) 2935 2936 return this 2937 2938 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2939 index = self._index - 1 2940 negate = self._match(TokenType.NOT) 2941 2942 if self._match_text_seq("DISTINCT", "FROM"): 2943 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2944 return self.expression(klass, this=this, expression=self._parse_expression()) 2945 2946 expression = self._parse_null() or self._parse_boolean() 2947 if not expression: 2948 self._retreat(index) 2949 return None 2950 2951 this = self.expression(exp.Is, this=this, expression=expression) 2952 return self.expression(exp.Not, this=this) if negate else this 2953 2954 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2955 unnest = self._parse_unnest(with_alias=False) 2956 if unnest: 2957 this = self.expression(exp.In, this=this, unnest=unnest) 2958 elif self._match(TokenType.L_PAREN): 2959 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2960 2961 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2962 this = self.expression(exp.In, this=this, query=expressions[0]) 2963 else: 2964 this = self.expression(exp.In, this=this, expressions=expressions) 2965 2966 self._match_r_paren(this) 2967 else: 2968 this = self.expression(exp.In, this=this, field=self._parse_field()) 2969 2970 return this 2971 2972 def _parse_between(self, this: exp.Expression) -> exp.Between: 2973 low = self._parse_bitwise() 2974 self._match(TokenType.AND) 2975 high = self._parse_bitwise() 2976 return self.expression(exp.Between, this=this, low=low, high=high) 2977 2978 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2979 if not self._match(TokenType.ESCAPE): 2980 return this 2981 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2982 2983 def _parse_interval(self) -> t.Optional[exp.Interval]: 2984 if not self._match(TokenType.INTERVAL): 2985 return None 2986 2987 if self._match(TokenType.STRING, advance=False): 2988 this = self._parse_primary() 2989 else: 2990 this = self._parse_term() 2991 2992 unit = self._parse_function() or self._parse_var() 2993 2994 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2995 # each INTERVAL expression into this canonical form so it's easy to transpile 2996 if this and this.is_number: 2997 this = exp.Literal.string(this.name) 2998 elif this and this.is_string: 2999 parts = this.name.split() 3000 3001 if len(parts) == 2: 3002 if unit: 3003 # this is not actually a unit, it's something else 3004 unit = None 3005 self._retreat(self._index - 1) 3006 else: 3007 this = exp.Literal.string(parts[0]) 3008 unit = self.expression(exp.Var, this=parts[1]) 3009 3010 return self.expression(exp.Interval, this=this, unit=unit) 3011 3012 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3013 this = self._parse_term() 3014 3015 while True: 3016 if self._match_set(self.BITWISE): 3017 this = self.expression( 3018 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 3019 ) 3020 elif self._match_pair(TokenType.LT, TokenType.LT): 3021 this = self.expression( 3022 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3023 ) 3024 elif self._match_pair(TokenType.GT, TokenType.GT): 3025 this = self.expression( 3026 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3027 ) 3028 else: 3029 break 3030 3031 return this 3032 3033 def _parse_term(self) -> t.Optional[exp.Expression]: 3034 return self._parse_tokens(self._parse_factor, self.TERM) 3035 3036 def _parse_factor(self) -> t.Optional[exp.Expression]: 3037 return self._parse_tokens(self._parse_unary, self.FACTOR) 3038 3039 def _parse_unary(self) -> t.Optional[exp.Expression]: 3040 if self._match_set(self.UNARY_PARSERS): 3041 return self.UNARY_PARSERS[self._prev.token_type](self) 3042 return self._parse_at_time_zone(self._parse_type()) 3043 3044 def _parse_type(self) -> t.Optional[exp.Expression]: 3045 interval = self._parse_interval() 3046 if interval: 3047 return interval 3048 3049 index = self._index 3050 data_type = self._parse_types(check_func=True) 3051 this = self._parse_column() 3052 3053 if data_type: 3054 if isinstance(this, exp.Literal): 3055 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3056 if parser: 3057 return parser(self, this, data_type) 3058 return self.expression(exp.Cast, this=this, to=data_type) 3059 if not data_type.expressions: 3060 self._retreat(index) 3061 return self._parse_column() 3062 return self._parse_column_ops(data_type) 3063 3064 return this 3065 3066 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3067 this = self._parse_type() 3068 if not this: 3069 return None 3070 3071 return self.expression( 3072 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3073 ) 3074 3075 def _parse_types( 3076 self, check_func: bool = False, schema: bool = False 3077 ) -> t.Optional[exp.Expression]: 3078 index = self._index 3079 3080 prefix = self._match_text_seq("SYSUDTLIB", ".") 3081 3082 if not self._match_set(self.TYPE_TOKENS): 3083 return None 3084 3085 type_token = self._prev.token_type 3086 3087 if type_token == TokenType.PSEUDO_TYPE: 3088 return self.expression(exp.PseudoType, this=self._prev.text) 3089 3090 nested = type_token in self.NESTED_TYPE_TOKENS 3091 is_struct = type_token == TokenType.STRUCT 3092 expressions = None 3093 maybe_func = False 3094 3095 if self._match(TokenType.L_PAREN): 3096 if is_struct: 3097 expressions = self._parse_csv(self._parse_struct_types) 3098 elif nested: 3099 expressions = self._parse_csv( 3100 lambda: self._parse_types(check_func=check_func, schema=schema) 3101 ) 3102 elif type_token in self.ENUM_TYPE_TOKENS: 3103 expressions = self._parse_csv(self._parse_primary) 3104 else: 3105 expressions = self._parse_csv(self._parse_type_size) 3106 3107 if not expressions or not self._match(TokenType.R_PAREN): 3108 self._retreat(index) 3109 return None 3110 3111 maybe_func = True 3112 3113 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3114 this = exp.DataType( 3115 this=exp.DataType.Type.ARRAY, 3116 expressions=[ 3117 exp.DataType( 3118 this=exp.DataType.Type[type_token.value], 3119 expressions=expressions, 3120 nested=nested, 3121 ) 3122 ], 3123 nested=True, 3124 ) 3125 3126 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3127 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3128 3129 return this 3130 3131 if self._match(TokenType.L_BRACKET): 3132 self._retreat(index) 3133 return None 3134 3135 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3136 if nested and self._match(TokenType.LT): 3137 if is_struct: 3138 expressions = self._parse_csv(self._parse_struct_types) 3139 else: 3140 expressions = self._parse_csv( 3141 lambda: self._parse_types(check_func=check_func, schema=schema) 3142 ) 3143 3144 if not self._match(TokenType.GT): 3145 self.raise_error("Expecting >") 3146 3147 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3148 values = self._parse_csv(self._parse_conjunction) 3149 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3150 3151 value: t.Optional[exp.Expression] = None 3152 if type_token in self.TIMESTAMPS: 3153 if self._match_text_seq("WITH", "TIME", "ZONE"): 3154 maybe_func = False 3155 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3156 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3157 maybe_func = False 3158 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3159 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3160 maybe_func = False 3161 elif type_token == TokenType.INTERVAL: 3162 unit = self._parse_var() 3163 3164 if not unit: 3165 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3166 else: 3167 value = self.expression(exp.Interval, unit=unit) 3168 3169 if maybe_func and check_func: 3170 index2 = self._index 3171 peek = self._parse_string() 3172 3173 if not peek: 3174 self._retreat(index) 3175 return None 3176 3177 self._retreat(index2) 3178 3179 if value: 3180 return value 3181 3182 return exp.DataType( 3183 this=exp.DataType.Type[type_token.value], 3184 expressions=expressions, 3185 nested=nested, 3186 values=values, 3187 prefix=prefix, 3188 ) 3189 3190 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3191 this = self._parse_type() or self._parse_id_var() 3192 self._match(TokenType.COLON) 3193 return self._parse_column_def(this) 3194 3195 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3196 if not self._match_text_seq("AT", "TIME", "ZONE"): 3197 return this 3198 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3199 3200 def _parse_column(self) -> t.Optional[exp.Expression]: 3201 this = self._parse_field() 3202 if isinstance(this, exp.Identifier): 3203 this = self.expression(exp.Column, this=this) 3204 elif not this: 3205 return self._parse_bracket(this) 3206 return self._parse_column_ops(this) 3207 3208 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3209 this = self._parse_bracket(this) 3210 3211 while self._match_set(self.COLUMN_OPERATORS): 3212 op_token = self._prev.token_type 3213 op = self.COLUMN_OPERATORS.get(op_token) 3214 3215 if op_token == TokenType.DCOLON: 3216 field = self._parse_types() 3217 if not field: 3218 self.raise_error("Expected type") 3219 elif op and self._curr: 3220 self._advance() 3221 value = self._prev.text 3222 field = ( 3223 exp.Literal.number(value) 3224 if self._prev.token_type == TokenType.NUMBER 3225 else exp.Literal.string(value) 3226 ) 3227 else: 3228 field = self._parse_field(anonymous_func=True, any_token=True) 3229 3230 if isinstance(field, exp.Func): 3231 # bigquery allows function calls like x.y.count(...) 3232 # SAFE.SUBSTR(...) 3233 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3234 this = self._replace_columns_with_dots(this) 3235 3236 if op: 3237 this = op(self, this, field) 3238 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3239 this = self.expression( 3240 exp.Column, 3241 this=field, 3242 table=this.this, 3243 db=this.args.get("table"), 3244 catalog=this.args.get("db"), 3245 ) 3246 else: 3247 this = self.expression(exp.Dot, this=this, expression=field) 3248 this = self._parse_bracket(this) 3249 return this 3250 3251 def _parse_primary(self) -> t.Optional[exp.Expression]: 3252 if self._match_set(self.PRIMARY_PARSERS): 3253 token_type = self._prev.token_type 3254 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3255 3256 if token_type == TokenType.STRING: 3257 expressions = [primary] 3258 while self._match(TokenType.STRING): 3259 expressions.append(exp.Literal.string(self._prev.text)) 3260 3261 if len(expressions) > 1: 3262 return self.expression(exp.Concat, expressions=expressions) 3263 3264 return primary 3265 3266 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3267 return exp.Literal.number(f"0.{self._prev.text}") 3268 3269 if self._match(TokenType.L_PAREN): 3270 comments = self._prev_comments 3271 query = self._parse_select() 3272 3273 if query: 3274 expressions = [query] 3275 else: 3276 expressions = self._parse_expressions() 3277 3278 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3279 3280 if isinstance(this, exp.Subqueryable): 3281 this = self._parse_set_operations( 3282 self._parse_subquery(this=this, parse_alias=False) 3283 ) 3284 elif len(expressions) > 1: 3285 this = self.expression(exp.Tuple, expressions=expressions) 3286 else: 3287 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3288 3289 if this: 3290 this.add_comments(comments) 3291 3292 self._match_r_paren(expression=this) 3293 return this 3294 3295 return None 3296 3297 def _parse_field( 3298 self, 3299 any_token: bool = False, 3300 tokens: t.Optional[t.Collection[TokenType]] = None, 3301 anonymous_func: bool = False, 3302 ) -> t.Optional[exp.Expression]: 3303 return ( 3304 self._parse_primary() 3305 or self._parse_function(anonymous=anonymous_func) 3306 or self._parse_id_var(any_token=any_token, tokens=tokens) 3307 ) 3308 3309 def _parse_function( 3310 self, 3311 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3312 anonymous: bool = False, 3313 optional_parens: bool = True, 3314 ) -> t.Optional[exp.Expression]: 3315 if not self._curr: 3316 return None 3317 3318 token_type = self._curr.token_type 3319 3320 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3321 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3322 3323 if not self._next or self._next.token_type != TokenType.L_PAREN: 3324 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3325 self._advance() 3326 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3327 3328 return None 3329 3330 if token_type not in self.FUNC_TOKENS: 3331 return None 3332 3333 this = self._curr.text 3334 upper = this.upper() 3335 self._advance(2) 3336 3337 parser = self.FUNCTION_PARSERS.get(upper) 3338 3339 if parser and not anonymous: 3340 this = parser(self) 3341 else: 3342 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3343 3344 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3345 this = self.expression(subquery_predicate, this=self._parse_select()) 3346 self._match_r_paren() 3347 return this 3348 3349 if functions is None: 3350 functions = self.FUNCTIONS 3351 3352 function = functions.get(upper) 3353 3354 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3355 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3356 3357 if function and not anonymous: 3358 this = self.validate_expression(function(args), args) 3359 else: 3360 this = self.expression(exp.Anonymous, this=this, expressions=args) 3361 3362 self._match(TokenType.R_PAREN, expression=this) 3363 return self._parse_window(this) 3364 3365 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3366 return self._parse_column_def(self._parse_id_var()) 3367 3368 def _parse_user_defined_function( 3369 self, kind: t.Optional[TokenType] = None 3370 ) -> t.Optional[exp.Expression]: 3371 this = self._parse_id_var() 3372 3373 while self._match(TokenType.DOT): 3374 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3375 3376 if not self._match(TokenType.L_PAREN): 3377 return this 3378 3379 expressions = self._parse_csv(self._parse_function_parameter) 3380 self._match_r_paren() 3381 return self.expression( 3382 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3383 ) 3384 3385 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3386 literal = self._parse_primary() 3387 if literal: 3388 return self.expression(exp.Introducer, this=token.text, expression=literal) 3389 3390 return self.expression(exp.Identifier, this=token.text) 3391 3392 def _parse_session_parameter(self) -> exp.SessionParameter: 3393 kind = None 3394 this = self._parse_id_var() or self._parse_primary() 3395 3396 if this and self._match(TokenType.DOT): 3397 kind = this.name 3398 this = self._parse_var() or self._parse_primary() 3399 3400 return self.expression(exp.SessionParameter, this=this, kind=kind) 3401 3402 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3403 index = self._index 3404 3405 if self._match(TokenType.L_PAREN): 3406 expressions = self._parse_csv(self._parse_id_var) 3407 3408 if not self._match(TokenType.R_PAREN): 3409 self._retreat(index) 3410 else: 3411 expressions = [self._parse_id_var()] 3412 3413 if self._match_set(self.LAMBDAS): 3414 return self.LAMBDAS[self._prev.token_type](self, expressions) 3415 3416 self._retreat(index) 3417 3418 this: t.Optional[exp.Expression] 3419 3420 if self._match(TokenType.DISTINCT): 3421 this = self.expression( 3422 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3423 ) 3424 else: 3425 this = self._parse_select_or_expression(alias=alias) 3426 3427 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3428 3429 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3430 index = self._index 3431 3432 if not self.errors: 3433 try: 3434 if self._parse_select(nested=True): 3435 return this 3436 except ParseError: 3437 pass 3438 finally: 3439 self.errors.clear() 3440 self._retreat(index) 3441 3442 if not self._match(TokenType.L_PAREN): 3443 return this 3444 3445 args = self._parse_csv( 3446 lambda: self._parse_constraint() 3447 or self._parse_column_def(self._parse_field(any_token=True)) 3448 ) 3449 3450 self._match_r_paren() 3451 return self.expression(exp.Schema, this=this, expressions=args) 3452 3453 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3454 # column defs are not really columns, they're identifiers 3455 if isinstance(this, exp.Column): 3456 this = this.this 3457 3458 kind = self._parse_types(schema=True) 3459 3460 if self._match_text_seq("FOR", "ORDINALITY"): 3461 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3462 3463 constraints = [] 3464 while True: 3465 constraint = self._parse_column_constraint() 3466 if not constraint: 3467 break 3468 constraints.append(constraint) 3469 3470 if not kind and not constraints: 3471 return this 3472 3473 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3474 3475 def _parse_auto_increment( 3476 self, 3477 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3478 start = None 3479 increment = None 3480 3481 if self._match(TokenType.L_PAREN, advance=False): 3482 args = self._parse_wrapped_csv(self._parse_bitwise) 3483 start = seq_get(args, 0) 3484 increment = seq_get(args, 1) 3485 elif self._match_text_seq("START"): 3486 start = self._parse_bitwise() 3487 self._match_text_seq("INCREMENT") 3488 increment = self._parse_bitwise() 3489 3490 if start and increment: 3491 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3492 3493 return exp.AutoIncrementColumnConstraint() 3494 3495 def _parse_compress(self) -> exp.CompressColumnConstraint: 3496 if self._match(TokenType.L_PAREN, advance=False): 3497 return self.expression( 3498 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3499 ) 3500 3501 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3502 3503 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3504 if self._match_text_seq("BY", "DEFAULT"): 3505 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3506 this = self.expression( 3507 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3508 ) 3509 else: 3510 self._match_text_seq("ALWAYS") 3511 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3512 3513 self._match(TokenType.ALIAS) 3514 identity = self._match_text_seq("IDENTITY") 3515 3516 if self._match(TokenType.L_PAREN): 3517 if self._match_text_seq("START", "WITH"): 3518 this.set("start", self._parse_bitwise()) 3519 if self._match_text_seq("INCREMENT", "BY"): 3520 this.set("increment", self._parse_bitwise()) 3521 if self._match_text_seq("MINVALUE"): 3522 this.set("minvalue", self._parse_bitwise()) 3523 if self._match_text_seq("MAXVALUE"): 3524 this.set("maxvalue", self._parse_bitwise()) 3525 3526 if self._match_text_seq("CYCLE"): 3527 this.set("cycle", True) 3528 elif self._match_text_seq("NO", "CYCLE"): 3529 this.set("cycle", False) 3530 3531 if not identity: 3532 this.set("expression", self._parse_bitwise()) 3533 3534 self._match_r_paren() 3535 3536 return this 3537 3538 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3539 self._match_text_seq("LENGTH") 3540 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3541 3542 def _parse_not_constraint( 3543 self, 3544 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3545 if self._match_text_seq("NULL"): 3546 return self.expression(exp.NotNullColumnConstraint) 3547 if self._match_text_seq("CASESPECIFIC"): 3548 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3549 return None 3550 3551 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3552 if self._match(TokenType.CONSTRAINT): 3553 this = self._parse_id_var() 3554 else: 3555 this = None 3556 3557 if self._match_texts(self.CONSTRAINT_PARSERS): 3558 return self.expression( 3559 exp.ColumnConstraint, 3560 this=this, 3561 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3562 ) 3563 3564 return this 3565 3566 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3567 if not self._match(TokenType.CONSTRAINT): 3568 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3569 3570 this = self._parse_id_var() 3571 expressions = [] 3572 3573 while True: 3574 constraint = self._parse_unnamed_constraint() or self._parse_function() 3575 if not constraint: 3576 break 3577 expressions.append(constraint) 3578 3579 return self.expression(exp.Constraint, this=this, expressions=expressions) 3580 3581 def _parse_unnamed_constraint( 3582 self, constraints: t.Optional[t.Collection[str]] = None 3583 ) -> t.Optional[exp.Expression]: 3584 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3585 return None 3586 3587 constraint = self._prev.text.upper() 3588 if constraint not in self.CONSTRAINT_PARSERS: 3589 self.raise_error(f"No parser found for schema constraint {constraint}.") 3590 3591 return self.CONSTRAINT_PARSERS[constraint](self) 3592 3593 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3594 self._match_text_seq("KEY") 3595 return self.expression( 3596 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3597 ) 3598 3599 def _parse_key_constraint_options(self) -> t.List[str]: 3600 options = [] 3601 while True: 3602 if not self._curr: 3603 break 3604 3605 if self._match(TokenType.ON): 3606 action = None 3607 on = self._advance_any() and self._prev.text 3608 3609 if self._match_text_seq("NO", "ACTION"): 3610 action = "NO ACTION" 3611 elif self._match_text_seq("CASCADE"): 3612 action = "CASCADE" 3613 elif self._match_pair(TokenType.SET, TokenType.NULL): 3614 action = "SET NULL" 3615 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3616 action = "SET DEFAULT" 3617 else: 3618 self.raise_error("Invalid key constraint") 3619 3620 options.append(f"ON {on} {action}") 3621 elif self._match_text_seq("NOT", "ENFORCED"): 3622 options.append("NOT ENFORCED") 3623 elif self._match_text_seq("DEFERRABLE"): 3624 options.append("DEFERRABLE") 3625 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3626 options.append("INITIALLY DEFERRED") 3627 elif self._match_text_seq("NORELY"): 3628 options.append("NORELY") 3629 elif self._match_text_seq("MATCH", "FULL"): 3630 options.append("MATCH FULL") 3631 else: 3632 break 3633 3634 return options 3635 3636 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3637 if match and not self._match(TokenType.REFERENCES): 3638 return None 3639 3640 expressions = None 3641 this = self._parse_table(schema=True) 3642 options = self._parse_key_constraint_options() 3643 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3644 3645 def _parse_foreign_key(self) -> exp.ForeignKey: 3646 expressions = self._parse_wrapped_id_vars() 3647 reference = self._parse_references() 3648 options = {} 3649 3650 while self._match(TokenType.ON): 3651 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3652 self.raise_error("Expected DELETE or UPDATE") 3653 3654 kind = self._prev.text.lower() 3655 3656 if self._match_text_seq("NO", "ACTION"): 3657 action = "NO ACTION" 3658 elif self._match(TokenType.SET): 3659 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3660 action = "SET " + self._prev.text.upper() 3661 else: 3662 self._advance() 3663 action = self._prev.text.upper() 3664 3665 options[kind] = action 3666 3667 return self.expression( 3668 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3669 ) 3670 3671 def _parse_primary_key( 3672 self, wrapped_optional: bool = False, in_props: bool = False 3673 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3674 desc = ( 3675 self._match_set((TokenType.ASC, TokenType.DESC)) 3676 and self._prev.token_type == TokenType.DESC 3677 ) 3678 3679 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3680 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3681 3682 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3683 options = self._parse_key_constraint_options() 3684 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3685 3686 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3687 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3688 return this 3689 3690 bracket_kind = self._prev.token_type 3691 3692 if self._match(TokenType.COLON): 3693 expressions: t.List[t.Optional[exp.Expression]] = [ 3694 self.expression(exp.Slice, expression=self._parse_conjunction()) 3695 ] 3696 else: 3697 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3698 3699 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3700 if bracket_kind == TokenType.L_BRACE: 3701 this = self.expression(exp.Struct, expressions=expressions) 3702 elif not this or this.name.upper() == "ARRAY": 3703 this = self.expression(exp.Array, expressions=expressions) 3704 else: 3705 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3706 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3707 3708 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3709 self.raise_error("Expected ]") 3710 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3711 self.raise_error("Expected }") 3712 3713 self._add_comments(this) 3714 return self._parse_bracket(this) 3715 3716 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3717 if self._match(TokenType.COLON): 3718 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3719 return this 3720 3721 def _parse_case(self) -> t.Optional[exp.Expression]: 3722 ifs = [] 3723 default = None 3724 3725 expression = self._parse_conjunction() 3726 3727 while self._match(TokenType.WHEN): 3728 this = self._parse_conjunction() 3729 self._match(TokenType.THEN) 3730 then = self._parse_conjunction() 3731 ifs.append(self.expression(exp.If, this=this, true=then)) 3732 3733 if self._match(TokenType.ELSE): 3734 default = self._parse_conjunction() 3735 3736 if not self._match(TokenType.END): 3737 self.raise_error("Expected END after CASE", self._prev) 3738 3739 return self._parse_window( 3740 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3741 ) 3742 3743 def _parse_if(self) -> t.Optional[exp.Expression]: 3744 if self._match(TokenType.L_PAREN): 3745 args = self._parse_csv(self._parse_conjunction) 3746 this = self.validate_expression(exp.If.from_arg_list(args), args) 3747 self._match_r_paren() 3748 else: 3749 index = self._index - 1 3750 condition = self._parse_conjunction() 3751 3752 if not condition: 3753 self._retreat(index) 3754 return None 3755 3756 self._match(TokenType.THEN) 3757 true = self._parse_conjunction() 3758 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3759 self._match(TokenType.END) 3760 this = self.expression(exp.If, this=condition, true=true, false=false) 3761 3762 return self._parse_window(this) 3763 3764 def _parse_extract(self) -> exp.Extract: 3765 this = self._parse_function() or self._parse_var() or self._parse_type() 3766 3767 if self._match(TokenType.FROM): 3768 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3769 3770 if not self._match(TokenType.COMMA): 3771 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3772 3773 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3774 3775 def _parse_any_value(self) -> exp.AnyValue: 3776 this = self._parse_lambda() 3777 is_max = None 3778 having = None 3779 3780 if self._match(TokenType.HAVING): 3781 self._match_texts(("MAX", "MIN")) 3782 is_max = self._prev.text == "MAX" 3783 having = self._parse_column() 3784 3785 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3786 3787 def _parse_cast(self, strict: bool) -> exp.Expression: 3788 this = self._parse_conjunction() 3789 3790 if not self._match(TokenType.ALIAS): 3791 if self._match(TokenType.COMMA): 3792 return self.expression( 3793 exp.CastToStrType, this=this, expression=self._parse_string() 3794 ) 3795 else: 3796 self.raise_error("Expected AS after CAST") 3797 3798 fmt = None 3799 to = self._parse_types() 3800 3801 if not to: 3802 self.raise_error("Expected TYPE after CAST") 3803 elif to.this == exp.DataType.Type.CHAR: 3804 if self._match(TokenType.CHARACTER_SET): 3805 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3806 elif self._match(TokenType.FORMAT): 3807 fmt_string = self._parse_string() 3808 fmt = self._parse_at_time_zone(fmt_string) 3809 3810 if to.this in exp.DataType.TEMPORAL_TYPES: 3811 this = self.expression( 3812 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3813 this=this, 3814 format=exp.Literal.string( 3815 format_time( 3816 fmt_string.this if fmt_string else "", 3817 self.FORMAT_MAPPING or self.TIME_MAPPING, 3818 self.FORMAT_TRIE or self.TIME_TRIE, 3819 ) 3820 ), 3821 ) 3822 3823 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3824 this.set("zone", fmt.args["zone"]) 3825 3826 return this 3827 3828 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3829 3830 def _parse_concat(self) -> t.Optional[exp.Expression]: 3831 args = self._parse_csv(self._parse_conjunction) 3832 if self.CONCAT_NULL_OUTPUTS_STRING: 3833 args = [ 3834 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3835 for arg in args 3836 if arg 3837 ] 3838 3839 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3840 # we find such a call we replace it with its argument. 3841 if len(args) == 1: 3842 return args[0] 3843 3844 return self.expression( 3845 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3846 ) 3847 3848 def _parse_string_agg(self) -> exp.Expression: 3849 if self._match(TokenType.DISTINCT): 3850 args: t.List[t.Optional[exp.Expression]] = [ 3851 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3852 ] 3853 if self._match(TokenType.COMMA): 3854 args.extend(self._parse_csv(self._parse_conjunction)) 3855 else: 3856 args = self._parse_csv(self._parse_conjunction) 3857 3858 index = self._index 3859 if not self._match(TokenType.R_PAREN): 3860 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3861 return self.expression( 3862 exp.GroupConcat, 3863 this=seq_get(args, 0), 3864 separator=self._parse_order(this=seq_get(args, 1)), 3865 ) 3866 3867 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3868 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3869 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3870 if not self._match_text_seq("WITHIN", "GROUP"): 3871 self._retreat(index) 3872 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3873 3874 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3875 order = self._parse_order(this=seq_get(args, 0)) 3876 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3877 3878 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3879 this = self._parse_bitwise() 3880 3881 if self._match(TokenType.USING): 3882 to: t.Optional[exp.Expression] = self.expression( 3883 exp.CharacterSet, this=self._parse_var() 3884 ) 3885 elif self._match(TokenType.COMMA): 3886 to = self._parse_types() 3887 else: 3888 to = None 3889 3890 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3891 3892 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3893 """ 3894 There are generally two variants of the DECODE function: 3895 3896 - DECODE(bin, charset) 3897 - DECODE(expression, search, result [, search, result] ... [, default]) 3898 3899 The second variant will always be parsed into a CASE expression. Note that NULL 3900 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3901 instead of relying on pattern matching. 3902 """ 3903 args = self._parse_csv(self._parse_conjunction) 3904 3905 if len(args) < 3: 3906 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3907 3908 expression, *expressions = args 3909 if not expression: 3910 return None 3911 3912 ifs = [] 3913 for search, result in zip(expressions[::2], expressions[1::2]): 3914 if not search or not result: 3915 return None 3916 3917 if isinstance(search, exp.Literal): 3918 ifs.append( 3919 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3920 ) 3921 elif isinstance(search, exp.Null): 3922 ifs.append( 3923 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3924 ) 3925 else: 3926 cond = exp.or_( 3927 exp.EQ(this=expression.copy(), expression=search), 3928 exp.and_( 3929 exp.Is(this=expression.copy(), expression=exp.Null()), 3930 exp.Is(this=search.copy(), expression=exp.Null()), 3931 copy=False, 3932 ), 3933 copy=False, 3934 ) 3935 ifs.append(exp.If(this=cond, true=result)) 3936 3937 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3938 3939 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3940 self._match_text_seq("KEY") 3941 key = self._parse_field() 3942 self._match(TokenType.COLON) 3943 self._match_text_seq("VALUE") 3944 value = self._parse_field() 3945 3946 if not key and not value: 3947 return None 3948 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3949 3950 def _parse_json_object(self) -> exp.JSONObject: 3951 star = self._parse_star() 3952 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3953 3954 null_handling = None 3955 if self._match_text_seq("NULL", "ON", "NULL"): 3956 null_handling = "NULL ON NULL" 3957 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3958 null_handling = "ABSENT ON NULL" 3959 3960 unique_keys = None 3961 if self._match_text_seq("WITH", "UNIQUE"): 3962 unique_keys = True 3963 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3964 unique_keys = False 3965 3966 self._match_text_seq("KEYS") 3967 3968 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3969 format_json = self._match_text_seq("FORMAT", "JSON") 3970 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3971 3972 return self.expression( 3973 exp.JSONObject, 3974 expressions=expressions, 3975 null_handling=null_handling, 3976 unique_keys=unique_keys, 3977 return_type=return_type, 3978 format_json=format_json, 3979 encoding=encoding, 3980 ) 3981 3982 def _parse_logarithm(self) -> exp.Func: 3983 # Default argument order is base, expression 3984 args = self._parse_csv(self._parse_range) 3985 3986 if len(args) > 1: 3987 if not self.LOG_BASE_FIRST: 3988 args.reverse() 3989 return exp.Log.from_arg_list(args) 3990 3991 return self.expression( 3992 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3993 ) 3994 3995 def _parse_match_against(self) -> exp.MatchAgainst: 3996 expressions = self._parse_csv(self._parse_column) 3997 3998 self._match_text_seq(")", "AGAINST", "(") 3999 4000 this = self._parse_string() 4001 4002 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4003 modifier = "IN NATURAL LANGUAGE MODE" 4004 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4005 modifier = f"{modifier} WITH QUERY EXPANSION" 4006 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4007 modifier = "IN BOOLEAN MODE" 4008 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4009 modifier = "WITH QUERY EXPANSION" 4010 else: 4011 modifier = None 4012 4013 return self.expression( 4014 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4015 ) 4016 4017 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4018 def _parse_open_json(self) -> exp.OpenJSON: 4019 this = self._parse_bitwise() 4020 path = self._match(TokenType.COMMA) and self._parse_string() 4021 4022 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4023 this = self._parse_field(any_token=True) 4024 kind = self._parse_types() 4025 path = self._parse_string() 4026 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4027 4028 return self.expression( 4029 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4030 ) 4031 4032 expressions = None 4033 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4034 self._match_l_paren() 4035 expressions = self._parse_csv(_parse_open_json_column_def) 4036 4037 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4038 4039 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4040 args = self._parse_csv(self._parse_bitwise) 4041 4042 if self._match(TokenType.IN): 4043 return self.expression( 4044 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4045 ) 4046 4047 if haystack_first: 4048 haystack = seq_get(args, 0) 4049 needle = seq_get(args, 1) 4050 else: 4051 needle = seq_get(args, 0) 4052 haystack = seq_get(args, 1) 4053 4054 return self.expression( 4055 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4056 ) 4057 4058 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4059 args = self._parse_csv(self._parse_table) 4060 return exp.JoinHint(this=func_name.upper(), expressions=args) 4061 4062 def _parse_substring(self) -> exp.Substring: 4063 # Postgres supports the form: substring(string [from int] [for int]) 4064 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4065 4066 args = self._parse_csv(self._parse_bitwise) 4067 4068 if self._match(TokenType.FROM): 4069 args.append(self._parse_bitwise()) 4070 if self._match(TokenType.FOR): 4071 args.append(self._parse_bitwise()) 4072 4073 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4074 4075 def _parse_trim(self) -> exp.Trim: 4076 # https://www.w3resource.com/sql/character-functions/trim.php 4077 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4078 4079 position = None 4080 collation = None 4081 4082 if self._match_texts(self.TRIM_TYPES): 4083 position = self._prev.text.upper() 4084 4085 expression = self._parse_bitwise() 4086 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4087 this = self._parse_bitwise() 4088 else: 4089 this = expression 4090 expression = None 4091 4092 if self._match(TokenType.COLLATE): 4093 collation = self._parse_bitwise() 4094 4095 return self.expression( 4096 exp.Trim, this=this, position=position, expression=expression, collation=collation 4097 ) 4098 4099 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4100 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4101 4102 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4103 return self._parse_window(self._parse_id_var(), alias=True) 4104 4105 def _parse_respect_or_ignore_nulls( 4106 self, this: t.Optional[exp.Expression] 4107 ) -> t.Optional[exp.Expression]: 4108 if self._match_text_seq("IGNORE", "NULLS"): 4109 return self.expression(exp.IgnoreNulls, this=this) 4110 if self._match_text_seq("RESPECT", "NULLS"): 4111 return self.expression(exp.RespectNulls, this=this) 4112 return this 4113 4114 def _parse_window( 4115 self, this: t.Optional[exp.Expression], alias: bool = False 4116 ) -> t.Optional[exp.Expression]: 4117 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4118 self._match(TokenType.WHERE) 4119 this = self.expression( 4120 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4121 ) 4122 self._match_r_paren() 4123 4124 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4125 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4126 if self._match_text_seq("WITHIN", "GROUP"): 4127 order = self._parse_wrapped(self._parse_order) 4128 this = self.expression(exp.WithinGroup, this=this, expression=order) 4129 4130 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4131 # Some dialects choose to implement and some do not. 4132 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4133 4134 # There is some code above in _parse_lambda that handles 4135 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4136 4137 # The below changes handle 4138 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4139 4140 # Oracle allows both formats 4141 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4142 # and Snowflake chose to do the same for familiarity 4143 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4144 this = self._parse_respect_or_ignore_nulls(this) 4145 4146 # bigquery select from window x AS (partition by ...) 4147 if alias: 4148 over = None 4149 self._match(TokenType.ALIAS) 4150 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4151 return this 4152 else: 4153 over = self._prev.text.upper() 4154 4155 if not self._match(TokenType.L_PAREN): 4156 return self.expression( 4157 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4158 ) 4159 4160 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4161 4162 first = self._match(TokenType.FIRST) 4163 if self._match_text_seq("LAST"): 4164 first = False 4165 4166 partition = self._parse_partition_by() 4167 order = self._parse_order() 4168 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4169 4170 if kind: 4171 self._match(TokenType.BETWEEN) 4172 start = self._parse_window_spec() 4173 self._match(TokenType.AND) 4174 end = self._parse_window_spec() 4175 4176 spec = self.expression( 4177 exp.WindowSpec, 4178 kind=kind, 4179 start=start["value"], 4180 start_side=start["side"], 4181 end=end["value"], 4182 end_side=end["side"], 4183 ) 4184 else: 4185 spec = None 4186 4187 self._match_r_paren() 4188 4189 window = self.expression( 4190 exp.Window, 4191 this=this, 4192 partition_by=partition, 4193 order=order, 4194 spec=spec, 4195 alias=window_alias, 4196 over=over, 4197 first=first, 4198 ) 4199 4200 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4201 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4202 return self._parse_window(window, alias=alias) 4203 4204 return window 4205 4206 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4207 self._match(TokenType.BETWEEN) 4208 4209 return { 4210 "value": ( 4211 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4212 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4213 or self._parse_bitwise() 4214 ), 4215 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4216 } 4217 4218 def _parse_alias( 4219 self, this: t.Optional[exp.Expression], explicit: bool = False 4220 ) -> t.Optional[exp.Expression]: 4221 any_token = self._match(TokenType.ALIAS) 4222 4223 if explicit and not any_token: 4224 return this 4225 4226 if self._match(TokenType.L_PAREN): 4227 aliases = self.expression( 4228 exp.Aliases, 4229 this=this, 4230 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4231 ) 4232 self._match_r_paren(aliases) 4233 return aliases 4234 4235 alias = self._parse_id_var(any_token) 4236 4237 if alias: 4238 return self.expression(exp.Alias, this=this, alias=alias) 4239 4240 return this 4241 4242 def _parse_id_var( 4243 self, 4244 any_token: bool = True, 4245 tokens: t.Optional[t.Collection[TokenType]] = None, 4246 ) -> t.Optional[exp.Expression]: 4247 identifier = self._parse_identifier() 4248 4249 if identifier: 4250 return identifier 4251 4252 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4253 quoted = self._prev.token_type == TokenType.STRING 4254 return exp.Identifier(this=self._prev.text, quoted=quoted) 4255 4256 return None 4257 4258 def _parse_string(self) -> t.Optional[exp.Expression]: 4259 if self._match(TokenType.STRING): 4260 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4261 return self._parse_placeholder() 4262 4263 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4264 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4265 4266 def _parse_number(self) -> t.Optional[exp.Expression]: 4267 if self._match(TokenType.NUMBER): 4268 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4269 return self._parse_placeholder() 4270 4271 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4272 if self._match(TokenType.IDENTIFIER): 4273 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4274 return self._parse_placeholder() 4275 4276 def _parse_var( 4277 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4278 ) -> t.Optional[exp.Expression]: 4279 if ( 4280 (any_token and self._advance_any()) 4281 or self._match(TokenType.VAR) 4282 or (self._match_set(tokens) if tokens else False) 4283 ): 4284 return self.expression(exp.Var, this=self._prev.text) 4285 return self._parse_placeholder() 4286 4287 def _advance_any(self) -> t.Optional[Token]: 4288 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4289 self._advance() 4290 return self._prev 4291 return None 4292 4293 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4294 return self._parse_var() or self._parse_string() 4295 4296 def _parse_null(self) -> t.Optional[exp.Expression]: 4297 if self._match(TokenType.NULL): 4298 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4299 return None 4300 4301 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4302 if self._match(TokenType.TRUE): 4303 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4304 if self._match(TokenType.FALSE): 4305 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4306 return None 4307 4308 def _parse_star(self) -> t.Optional[exp.Expression]: 4309 if self._match(TokenType.STAR): 4310 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4311 return None 4312 4313 def _parse_parameter(self) -> exp.Parameter: 4314 wrapped = self._match(TokenType.L_BRACE) 4315 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4316 self._match(TokenType.R_BRACE) 4317 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4318 4319 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4320 if self._match_set(self.PLACEHOLDER_PARSERS): 4321 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4322 if placeholder: 4323 return placeholder 4324 self._advance(-1) 4325 return None 4326 4327 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4328 if not self._match(TokenType.EXCEPT): 4329 return None 4330 if self._match(TokenType.L_PAREN, advance=False): 4331 return self._parse_wrapped_csv(self._parse_column) 4332 return self._parse_csv(self._parse_column) 4333 4334 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4335 if not self._match(TokenType.REPLACE): 4336 return None 4337 if self._match(TokenType.L_PAREN, advance=False): 4338 return self._parse_wrapped_csv(self._parse_expression) 4339 return self._parse_expressions() 4340 4341 def _parse_csv( 4342 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4343 ) -> t.List[t.Optional[exp.Expression]]: 4344 parse_result = parse_method() 4345 items = [parse_result] if parse_result is not None else [] 4346 4347 while self._match(sep): 4348 self._add_comments(parse_result) 4349 parse_result = parse_method() 4350 if parse_result is not None: 4351 items.append(parse_result) 4352 4353 return items 4354 4355 def _parse_tokens( 4356 self, parse_method: t.Callable, expressions: t.Dict 4357 ) -> t.Optional[exp.Expression]: 4358 this = parse_method() 4359 4360 while self._match_set(expressions): 4361 this = self.expression( 4362 expressions[self._prev.token_type], 4363 this=this, 4364 comments=self._prev_comments, 4365 expression=parse_method(), 4366 ) 4367 4368 return this 4369 4370 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4371 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4372 4373 def _parse_wrapped_csv( 4374 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4375 ) -> t.List[t.Optional[exp.Expression]]: 4376 return self._parse_wrapped( 4377 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4378 ) 4379 4380 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4381 wrapped = self._match(TokenType.L_PAREN) 4382 if not wrapped and not optional: 4383 self.raise_error("Expecting (") 4384 parse_result = parse_method() 4385 if wrapped: 4386 self._match_r_paren() 4387 return parse_result 4388 4389 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4390 return self._parse_csv(self._parse_expression) 4391 4392 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4393 return self._parse_select() or self._parse_set_operations( 4394 self._parse_expression() if alias else self._parse_conjunction() 4395 ) 4396 4397 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4398 return self._parse_query_modifiers( 4399 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4400 ) 4401 4402 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4403 this = None 4404 if self._match_texts(self.TRANSACTION_KIND): 4405 this = self._prev.text 4406 4407 self._match_texts({"TRANSACTION", "WORK"}) 4408 4409 modes = [] 4410 while True: 4411 mode = [] 4412 while self._match(TokenType.VAR): 4413 mode.append(self._prev.text) 4414 4415 if mode: 4416 modes.append(" ".join(mode)) 4417 if not self._match(TokenType.COMMA): 4418 break 4419 4420 return self.expression(exp.Transaction, this=this, modes=modes) 4421 4422 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4423 chain = None 4424 savepoint = None 4425 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4426 4427 self._match_texts({"TRANSACTION", "WORK"}) 4428 4429 if self._match_text_seq("TO"): 4430 self._match_text_seq("SAVEPOINT") 4431 savepoint = self._parse_id_var() 4432 4433 if self._match(TokenType.AND): 4434 chain = not self._match_text_seq("NO") 4435 self._match_text_seq("CHAIN") 4436 4437 if is_rollback: 4438 return self.expression(exp.Rollback, savepoint=savepoint) 4439 4440 return self.expression(exp.Commit, chain=chain) 4441 4442 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4443 if not self._match_text_seq("ADD"): 4444 return None 4445 4446 self._match(TokenType.COLUMN) 4447 exists_column = self._parse_exists(not_=True) 4448 expression = self._parse_column_def(self._parse_field(any_token=True)) 4449 4450 if expression: 4451 expression.set("exists", exists_column) 4452 4453 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4454 if self._match_texts(("FIRST", "AFTER")): 4455 position = self._prev.text 4456 column_position = self.expression( 4457 exp.ColumnPosition, this=self._parse_column(), position=position 4458 ) 4459 expression.set("position", column_position) 4460 4461 return expression 4462 4463 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4464 drop = self._match(TokenType.DROP) and self._parse_drop() 4465 if drop and not isinstance(drop, exp.Command): 4466 drop.set("kind", drop.args.get("kind", "COLUMN")) 4467 return drop 4468 4469 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4470 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4471 return self.expression( 4472 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4473 ) 4474 4475 def _parse_add_constraint(self) -> exp.AddConstraint: 4476 this = None 4477 kind = self._prev.token_type 4478 4479 if kind == TokenType.CONSTRAINT: 4480 this = self._parse_id_var() 4481 4482 if self._match_text_seq("CHECK"): 4483 expression = self._parse_wrapped(self._parse_conjunction) 4484 enforced = self._match_text_seq("ENFORCED") 4485 4486 return self.expression( 4487 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4488 ) 4489 4490 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4491 expression = self._parse_foreign_key() 4492 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4493 expression = self._parse_primary_key() 4494 else: 4495 expression = None 4496 4497 return self.expression(exp.AddConstraint, this=this, expression=expression) 4498 4499 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4500 index = self._index - 1 4501 4502 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4503 return self._parse_csv(self._parse_add_constraint) 4504 4505 self._retreat(index) 4506 return self._parse_csv(self._parse_add_column) 4507 4508 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4509 self._match(TokenType.COLUMN) 4510 column = self._parse_field(any_token=True) 4511 4512 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4513 return self.expression(exp.AlterColumn, this=column, drop=True) 4514 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4515 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4516 4517 self._match_text_seq("SET", "DATA") 4518 return self.expression( 4519 exp.AlterColumn, 4520 this=column, 4521 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4522 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4523 using=self._match(TokenType.USING) and self._parse_conjunction(), 4524 ) 4525 4526 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4527 index = self._index - 1 4528 4529 partition_exists = self._parse_exists() 4530 if self._match(TokenType.PARTITION, advance=False): 4531 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4532 4533 self._retreat(index) 4534 return self._parse_csv(self._parse_drop_column) 4535 4536 def _parse_alter_table_rename(self) -> exp.RenameTable: 4537 self._match_text_seq("TO") 4538 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4539 4540 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4541 start = self._prev 4542 4543 if not self._match(TokenType.TABLE): 4544 return self._parse_as_command(start) 4545 4546 exists = self._parse_exists() 4547 this = self._parse_table(schema=True) 4548 4549 if self._next: 4550 self._advance() 4551 4552 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4553 if parser: 4554 actions = ensure_list(parser(self)) 4555 4556 if not self._curr: 4557 return self.expression( 4558 exp.AlterTable, 4559 this=this, 4560 exists=exists, 4561 actions=actions, 4562 ) 4563 return self._parse_as_command(start) 4564 4565 def _parse_merge(self) -> exp.Merge: 4566 self._match(TokenType.INTO) 4567 target = self._parse_table() 4568 4569 self._match(TokenType.USING) 4570 using = self._parse_table() 4571 4572 self._match(TokenType.ON) 4573 on = self._parse_conjunction() 4574 4575 whens = [] 4576 while self._match(TokenType.WHEN): 4577 matched = not self._match(TokenType.NOT) 4578 self._match_text_seq("MATCHED") 4579 source = ( 4580 False 4581 if self._match_text_seq("BY", "TARGET") 4582 else self._match_text_seq("BY", "SOURCE") 4583 ) 4584 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4585 4586 self._match(TokenType.THEN) 4587 4588 if self._match(TokenType.INSERT): 4589 _this = self._parse_star() 4590 if _this: 4591 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4592 else: 4593 then = self.expression( 4594 exp.Insert, 4595 this=self._parse_value(), 4596 expression=self._match(TokenType.VALUES) and self._parse_value(), 4597 ) 4598 elif self._match(TokenType.UPDATE): 4599 expressions = self._parse_star() 4600 if expressions: 4601 then = self.expression(exp.Update, expressions=expressions) 4602 else: 4603 then = self.expression( 4604 exp.Update, 4605 expressions=self._match(TokenType.SET) 4606 and self._parse_csv(self._parse_equality), 4607 ) 4608 elif self._match(TokenType.DELETE): 4609 then = self.expression(exp.Var, this=self._prev.text) 4610 else: 4611 then = None 4612 4613 whens.append( 4614 self.expression( 4615 exp.When, 4616 matched=matched, 4617 source=source, 4618 condition=condition, 4619 then=then, 4620 ) 4621 ) 4622 4623 return self.expression( 4624 exp.Merge, 4625 this=target, 4626 using=using, 4627 on=on, 4628 expressions=whens, 4629 ) 4630 4631 def _parse_show(self) -> t.Optional[exp.Expression]: 4632 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4633 if parser: 4634 return parser(self) 4635 self._advance() 4636 return self.expression(exp.Show, this=self._prev.text.upper()) 4637 4638 def _parse_set_item_assignment( 4639 self, kind: t.Optional[str] = None 4640 ) -> t.Optional[exp.Expression]: 4641 index = self._index 4642 4643 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4644 return self._parse_set_transaction(global_=kind == "GLOBAL") 4645 4646 left = self._parse_primary() or self._parse_id_var() 4647 4648 if not self._match_texts(("=", "TO")): 4649 self._retreat(index) 4650 return None 4651 4652 right = self._parse_statement() or self._parse_id_var() 4653 this = self.expression(exp.EQ, this=left, expression=right) 4654 4655 return self.expression(exp.SetItem, this=this, kind=kind) 4656 4657 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4658 self._match_text_seq("TRANSACTION") 4659 characteristics = self._parse_csv( 4660 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4661 ) 4662 return self.expression( 4663 exp.SetItem, 4664 expressions=characteristics, 4665 kind="TRANSACTION", 4666 **{"global": global_}, # type: ignore 4667 ) 4668 4669 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4670 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4671 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4672 4673 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4674 index = self._index 4675 set_ = self.expression( 4676 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4677 ) 4678 4679 if self._curr: 4680 self._retreat(index) 4681 return self._parse_as_command(self._prev) 4682 4683 return set_ 4684 4685 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4686 for option in options: 4687 if self._match_text_seq(*option.split(" ")): 4688 return exp.var(option) 4689 return None 4690 4691 def _parse_as_command(self, start: Token) -> exp.Command: 4692 while self._curr: 4693 self._advance() 4694 text = self._find_sql(start, self._prev) 4695 size = len(start.text) 4696 return exp.Command(this=text[:size], expression=text[size:]) 4697 4698 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4699 settings = [] 4700 4701 self._match_l_paren() 4702 kind = self._parse_id_var() 4703 4704 if self._match(TokenType.L_PAREN): 4705 while True: 4706 key = self._parse_id_var() 4707 value = self._parse_primary() 4708 4709 if not key and value is None: 4710 break 4711 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4712 self._match(TokenType.R_PAREN) 4713 4714 self._match_r_paren() 4715 4716 return self.expression( 4717 exp.DictProperty, 4718 this=this, 4719 kind=kind.this if kind else None, 4720 settings=settings, 4721 ) 4722 4723 def _parse_dict_range(self, this: str) -> exp.DictRange: 4724 self._match_l_paren() 4725 has_min = self._match_text_seq("MIN") 4726 if has_min: 4727 min = self._parse_var() or self._parse_primary() 4728 self._match_text_seq("MAX") 4729 max = self._parse_var() or self._parse_primary() 4730 else: 4731 max = self._parse_var() or self._parse_primary() 4732 min = exp.Literal.number(0) 4733 self._match_r_paren() 4734 return self.expression(exp.DictRange, this=this, min=min, max=max) 4735 4736 def _find_parser( 4737 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4738 ) -> t.Optional[t.Callable]: 4739 if not self._curr: 4740 return None 4741 4742 index = self._index 4743 this = [] 4744 while True: 4745 # The current token might be multiple words 4746 curr = self._curr.text.upper() 4747 key = curr.split(" ") 4748 this.append(curr) 4749 4750 self._advance() 4751 result, trie = in_trie(trie, key) 4752 if result == TrieResult.FAILED: 4753 break 4754 4755 if result == TrieResult.EXISTS: 4756 subparser = parsers[" ".join(this)] 4757 return subparser 4758 4759 self._retreat(index) 4760 return None 4761 4762 def _match(self, token_type, advance=True, expression=None): 4763 if not self._curr: 4764 return None 4765 4766 if self._curr.token_type == token_type: 4767 if advance: 4768 self._advance() 4769 self._add_comments(expression) 4770 return True 4771 4772 return None 4773 4774 def _match_set(self, types, advance=True): 4775 if not self._curr: 4776 return None 4777 4778 if self._curr.token_type in types: 4779 if advance: 4780 self._advance() 4781 return True 4782 4783 return None 4784 4785 def _match_pair(self, token_type_a, token_type_b, advance=True): 4786 if not self._curr or not self._next: 4787 return None 4788 4789 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4790 if advance: 4791 self._advance(2) 4792 return True 4793 4794 return None 4795 4796 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4797 if not self._match(TokenType.L_PAREN, expression=expression): 4798 self.raise_error("Expecting (") 4799 4800 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4801 if not self._match(TokenType.R_PAREN, expression=expression): 4802 self.raise_error("Expecting )") 4803 4804 def _match_texts(self, texts, advance=True): 4805 if self._curr and self._curr.text.upper() in texts: 4806 if advance: 4807 self._advance() 4808 return True 4809 return False 4810 4811 def _match_text_seq(self, *texts, advance=True): 4812 index = self._index 4813 for text in texts: 4814 if self._curr and self._curr.text.upper() == text: 4815 self._advance() 4816 else: 4817 self._retreat(index) 4818 return False 4819 4820 if not advance: 4821 self._retreat(index) 4822 4823 return True 4824 4825 @t.overload 4826 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4827 ... 4828 4829 @t.overload 4830 def _replace_columns_with_dots( 4831 self, this: t.Optional[exp.Expression] 4832 ) -> t.Optional[exp.Expression]: 4833 ... 4834 4835 def _replace_columns_with_dots(self, this): 4836 if isinstance(this, exp.Dot): 4837 exp.replace_children(this, self._replace_columns_with_dots) 4838 elif isinstance(this, exp.Column): 4839 exp.replace_children(this, self._replace_columns_with_dots) 4840 table = this.args.get("table") 4841 this = ( 4842 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4843 ) 4844 4845 return this 4846 4847 def _replace_lambda( 4848 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4849 ) -> t.Optional[exp.Expression]: 4850 if not node: 4851 return node 4852 4853 for column in node.find_all(exp.Column): 4854 if column.parts[0].name in lambda_variables: 4855 dot_or_id = column.to_dot() if column.table else column.this 4856 parent = column.parent 4857 4858 while isinstance(parent, exp.Dot): 4859 if not isinstance(parent.parent, exp.Dot): 4860 parent.replace(dot_or_id) 4861 break 4862 parent = parent.parent 4863 else: 4864 if column is node: 4865 node = dot_or_id 4866 else: 4867 column.replace(dot_or_id) 4868 return node
21def parse_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 22 if len(args) == 1 and args[0].is_star: 23 return exp.StarMap(this=args[0]) 24 25 keys = [] 26 values = [] 27 for i in range(0, len(args), 2): 28 keys.append(args[i]) 29 values.append(args[i + 1]) 30 31 return exp.VarMap( 32 keys=exp.Array(expressions=keys), 33 values=exp.Array(expressions=values), 34 )
60class Parser(metaclass=_Parser): 61 """ 62 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 63 64 Args: 65 error_level: The desired error level. 66 Default: ErrorLevel.IMMEDIATE 67 error_message_context: Determines the amount of context to capture from a 68 query string when displaying the error message (in number of characters). 69 Default: 100 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 """ 74 75 FUNCTIONS: t.Dict[str, t.Callable] = { 76 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 77 "DATE_TO_DATE_STR": lambda args: exp.Cast( 78 this=seq_get(args, 0), 79 to=exp.DataType(this=exp.DataType.Type.TEXT), 80 ), 81 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 82 "LIKE": parse_like, 83 "TIME_TO_TIME_STR": lambda args: exp.Cast( 84 this=seq_get(args, 0), 85 to=exp.DataType(this=exp.DataType.Type.TEXT), 86 ), 87 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 88 this=exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 start=exp.Literal.number(1), 93 length=exp.Literal.number(10), 94 ), 95 "VAR_MAP": parse_var_map, 96 } 97 98 NO_PAREN_FUNCTIONS = { 99 TokenType.CURRENT_DATE: exp.CurrentDate, 100 TokenType.CURRENT_DATETIME: exp.CurrentDate, 101 TokenType.CURRENT_TIME: exp.CurrentTime, 102 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 103 TokenType.CURRENT_USER: exp.CurrentUser, 104 } 105 106 NESTED_TYPE_TOKENS = { 107 TokenType.ARRAY, 108 TokenType.MAP, 109 TokenType.NULLABLE, 110 TokenType.STRUCT, 111 } 112 113 ENUM_TYPE_TOKENS = { 114 TokenType.ENUM, 115 } 116 117 TYPE_TOKENS = { 118 TokenType.BIT, 119 TokenType.BOOLEAN, 120 TokenType.TINYINT, 121 TokenType.UTINYINT, 122 TokenType.SMALLINT, 123 TokenType.USMALLINT, 124 TokenType.INT, 125 TokenType.UINT, 126 TokenType.BIGINT, 127 TokenType.UBIGINT, 128 TokenType.INT128, 129 TokenType.UINT128, 130 TokenType.INT256, 131 TokenType.UINT256, 132 TokenType.FLOAT, 133 TokenType.DOUBLE, 134 TokenType.CHAR, 135 TokenType.NCHAR, 136 TokenType.VARCHAR, 137 TokenType.NVARCHAR, 138 TokenType.TEXT, 139 TokenType.MEDIUMTEXT, 140 TokenType.LONGTEXT, 141 TokenType.MEDIUMBLOB, 142 TokenType.LONGBLOB, 143 TokenType.BINARY, 144 TokenType.VARBINARY, 145 TokenType.JSON, 146 TokenType.JSONB, 147 TokenType.INTERVAL, 148 TokenType.TIME, 149 TokenType.TIMESTAMP, 150 TokenType.TIMESTAMPTZ, 151 TokenType.TIMESTAMPLTZ, 152 TokenType.DATETIME, 153 TokenType.DATETIME64, 154 TokenType.DATE, 155 TokenType.INT4RANGE, 156 TokenType.INT4MULTIRANGE, 157 TokenType.INT8RANGE, 158 TokenType.INT8MULTIRANGE, 159 TokenType.NUMRANGE, 160 TokenType.NUMMULTIRANGE, 161 TokenType.TSRANGE, 162 TokenType.TSMULTIRANGE, 163 TokenType.TSTZRANGE, 164 TokenType.TSTZMULTIRANGE, 165 TokenType.DATERANGE, 166 TokenType.DATEMULTIRANGE, 167 TokenType.DECIMAL, 168 TokenType.BIGDECIMAL, 169 TokenType.UUID, 170 TokenType.GEOGRAPHY, 171 TokenType.GEOMETRY, 172 TokenType.HLLSKETCH, 173 TokenType.HSTORE, 174 TokenType.PSEUDO_TYPE, 175 TokenType.SUPER, 176 TokenType.SERIAL, 177 TokenType.SMALLSERIAL, 178 TokenType.BIGSERIAL, 179 TokenType.XML, 180 TokenType.UNIQUEIDENTIFIER, 181 TokenType.USERDEFINED, 182 TokenType.MONEY, 183 TokenType.SMALLMONEY, 184 TokenType.ROWVERSION, 185 TokenType.IMAGE, 186 TokenType.VARIANT, 187 TokenType.OBJECT, 188 TokenType.INET, 189 TokenType.IPADDRESS, 190 TokenType.IPPREFIX, 191 TokenType.ENUM, 192 *NESTED_TYPE_TOKENS, 193 } 194 195 SUBQUERY_PREDICATES = { 196 TokenType.ANY: exp.Any, 197 TokenType.ALL: exp.All, 198 TokenType.EXISTS: exp.Exists, 199 TokenType.SOME: exp.Any, 200 } 201 202 RESERVED_KEYWORDS = { 203 *Tokenizer.SINGLE_TOKENS.values(), 204 TokenType.SELECT, 205 } 206 207 DB_CREATABLES = { 208 TokenType.DATABASE, 209 TokenType.SCHEMA, 210 TokenType.TABLE, 211 TokenType.VIEW, 212 TokenType.DICTIONARY, 213 } 214 215 CREATABLES = { 216 TokenType.COLUMN, 217 TokenType.FUNCTION, 218 TokenType.INDEX, 219 TokenType.PROCEDURE, 220 *DB_CREATABLES, 221 } 222 223 # Tokens that can represent identifiers 224 ID_VAR_TOKENS = { 225 TokenType.VAR, 226 TokenType.ANTI, 227 TokenType.APPLY, 228 TokenType.ASC, 229 TokenType.AUTO_INCREMENT, 230 TokenType.BEGIN, 231 TokenType.CACHE, 232 TokenType.CASE, 233 TokenType.COLLATE, 234 TokenType.COMMAND, 235 TokenType.COMMENT, 236 TokenType.COMMIT, 237 TokenType.CONSTRAINT, 238 TokenType.DEFAULT, 239 TokenType.DELETE, 240 TokenType.DESC, 241 TokenType.DESCRIBE, 242 TokenType.DICTIONARY, 243 TokenType.DIV, 244 TokenType.END, 245 TokenType.EXECUTE, 246 TokenType.ESCAPE, 247 TokenType.FALSE, 248 TokenType.FIRST, 249 TokenType.FILTER, 250 TokenType.FORMAT, 251 TokenType.FULL, 252 TokenType.IF, 253 TokenType.IS, 254 TokenType.ISNULL, 255 TokenType.INTERVAL, 256 TokenType.KEEP, 257 TokenType.LEFT, 258 TokenType.LOAD, 259 TokenType.MERGE, 260 TokenType.NATURAL, 261 TokenType.NEXT, 262 TokenType.OFFSET, 263 TokenType.ORDINALITY, 264 TokenType.OVERWRITE, 265 TokenType.PARTITION, 266 TokenType.PERCENT, 267 TokenType.PIVOT, 268 TokenType.PRAGMA, 269 TokenType.RANGE, 270 TokenType.REFERENCES, 271 TokenType.RIGHT, 272 TokenType.ROW, 273 TokenType.ROWS, 274 TokenType.SEMI, 275 TokenType.SET, 276 TokenType.SETTINGS, 277 TokenType.SHOW, 278 TokenType.TEMPORARY, 279 TokenType.TOP, 280 TokenType.TRUE, 281 TokenType.UNIQUE, 282 TokenType.UNPIVOT, 283 TokenType.UPDATE, 284 TokenType.VOLATILE, 285 TokenType.WINDOW, 286 *CREATABLES, 287 *SUBQUERY_PREDICATES, 288 *TYPE_TOKENS, 289 *NO_PAREN_FUNCTIONS, 290 } 291 292 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 293 294 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 295 TokenType.APPLY, 296 TokenType.ASOF, 297 TokenType.FULL, 298 TokenType.LEFT, 299 TokenType.LOCK, 300 TokenType.NATURAL, 301 TokenType.OFFSET, 302 TokenType.RIGHT, 303 TokenType.WINDOW, 304 } 305 306 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 307 308 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 309 310 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 311 312 FUNC_TOKENS = { 313 TokenType.COMMAND, 314 TokenType.CURRENT_DATE, 315 TokenType.CURRENT_DATETIME, 316 TokenType.CURRENT_TIMESTAMP, 317 TokenType.CURRENT_TIME, 318 TokenType.CURRENT_USER, 319 TokenType.FILTER, 320 TokenType.FIRST, 321 TokenType.FORMAT, 322 TokenType.GLOB, 323 TokenType.IDENTIFIER, 324 TokenType.INDEX, 325 TokenType.ISNULL, 326 TokenType.ILIKE, 327 TokenType.LIKE, 328 TokenType.MERGE, 329 TokenType.OFFSET, 330 TokenType.PRIMARY_KEY, 331 TokenType.RANGE, 332 TokenType.REPLACE, 333 TokenType.RLIKE, 334 TokenType.ROW, 335 TokenType.UNNEST, 336 TokenType.VAR, 337 TokenType.LEFT, 338 TokenType.RIGHT, 339 TokenType.DATE, 340 TokenType.DATETIME, 341 TokenType.TABLE, 342 TokenType.TIMESTAMP, 343 TokenType.TIMESTAMPTZ, 344 TokenType.WINDOW, 345 TokenType.XOR, 346 *TYPE_TOKENS, 347 *SUBQUERY_PREDICATES, 348 } 349 350 CONJUNCTION = { 351 TokenType.AND: exp.And, 352 TokenType.OR: exp.Or, 353 } 354 355 EQUALITY = { 356 TokenType.EQ: exp.EQ, 357 TokenType.NEQ: exp.NEQ, 358 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 359 } 360 361 COMPARISON = { 362 TokenType.GT: exp.GT, 363 TokenType.GTE: exp.GTE, 364 TokenType.LT: exp.LT, 365 TokenType.LTE: exp.LTE, 366 } 367 368 BITWISE = { 369 TokenType.AMP: exp.BitwiseAnd, 370 TokenType.CARET: exp.BitwiseXor, 371 TokenType.PIPE: exp.BitwiseOr, 372 TokenType.DPIPE: exp.DPipe, 373 } 374 375 TERM = { 376 TokenType.DASH: exp.Sub, 377 TokenType.PLUS: exp.Add, 378 TokenType.MOD: exp.Mod, 379 TokenType.COLLATE: exp.Collate, 380 } 381 382 FACTOR = { 383 TokenType.DIV: exp.IntDiv, 384 TokenType.LR_ARROW: exp.Distance, 385 TokenType.SLASH: exp.Div, 386 TokenType.STAR: exp.Mul, 387 } 388 389 TIMESTAMPS = { 390 TokenType.TIME, 391 TokenType.TIMESTAMP, 392 TokenType.TIMESTAMPTZ, 393 TokenType.TIMESTAMPLTZ, 394 } 395 396 SET_OPERATIONS = { 397 TokenType.UNION, 398 TokenType.INTERSECT, 399 TokenType.EXCEPT, 400 } 401 402 JOIN_METHODS = { 403 TokenType.NATURAL, 404 TokenType.ASOF, 405 } 406 407 JOIN_SIDES = { 408 TokenType.LEFT, 409 TokenType.RIGHT, 410 TokenType.FULL, 411 } 412 413 JOIN_KINDS = { 414 TokenType.INNER, 415 TokenType.OUTER, 416 TokenType.CROSS, 417 TokenType.SEMI, 418 TokenType.ANTI, 419 } 420 421 JOIN_HINTS: t.Set[str] = set() 422 423 LAMBDAS = { 424 TokenType.ARROW: lambda self, expressions: self.expression( 425 exp.Lambda, 426 this=self._replace_lambda( 427 self._parse_conjunction(), 428 {node.name for node in expressions}, 429 ), 430 expressions=expressions, 431 ), 432 TokenType.FARROW: lambda self, expressions: self.expression( 433 exp.Kwarg, 434 this=exp.var(expressions[0].name), 435 expression=self._parse_conjunction(), 436 ), 437 } 438 439 COLUMN_OPERATORS = { 440 TokenType.DOT: None, 441 TokenType.DCOLON: lambda self, this, to: self.expression( 442 exp.Cast if self.STRICT_CAST else exp.TryCast, 443 this=this, 444 to=to, 445 ), 446 TokenType.ARROW: lambda self, this, path: self.expression( 447 exp.JSONExtract, 448 this=this, 449 expression=path, 450 ), 451 TokenType.DARROW: lambda self, this, path: self.expression( 452 exp.JSONExtractScalar, 453 this=this, 454 expression=path, 455 ), 456 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 457 exp.JSONBExtract, 458 this=this, 459 expression=path, 460 ), 461 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 462 exp.JSONBExtractScalar, 463 this=this, 464 expression=path, 465 ), 466 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 467 exp.JSONBContains, 468 this=this, 469 expression=key, 470 ), 471 } 472 473 EXPRESSION_PARSERS = { 474 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 475 exp.Column: lambda self: self._parse_column(), 476 exp.Condition: lambda self: self._parse_conjunction(), 477 exp.DataType: lambda self: self._parse_types(), 478 exp.Expression: lambda self: self._parse_statement(), 479 exp.From: lambda self: self._parse_from(), 480 exp.Group: lambda self: self._parse_group(), 481 exp.Having: lambda self: self._parse_having(), 482 exp.Identifier: lambda self: self._parse_id_var(), 483 exp.Join: lambda self: self._parse_join(), 484 exp.Lambda: lambda self: self._parse_lambda(), 485 exp.Lateral: lambda self: self._parse_lateral(), 486 exp.Limit: lambda self: self._parse_limit(), 487 exp.Offset: lambda self: self._parse_offset(), 488 exp.Order: lambda self: self._parse_order(), 489 exp.Ordered: lambda self: self._parse_ordered(), 490 exp.Properties: lambda self: self._parse_properties(), 491 exp.Qualify: lambda self: self._parse_qualify(), 492 exp.Returning: lambda self: self._parse_returning(), 493 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 494 exp.Table: lambda self: self._parse_table_parts(), 495 exp.TableAlias: lambda self: self._parse_table_alias(), 496 exp.Where: lambda self: self._parse_where(), 497 exp.Window: lambda self: self._parse_named_window(), 498 exp.With: lambda self: self._parse_with(), 499 "JOIN_TYPE": lambda self: self._parse_join_parts(), 500 } 501 502 STATEMENT_PARSERS = { 503 TokenType.ALTER: lambda self: self._parse_alter(), 504 TokenType.BEGIN: lambda self: self._parse_transaction(), 505 TokenType.CACHE: lambda self: self._parse_cache(), 506 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 507 TokenType.COMMENT: lambda self: self._parse_comment(), 508 TokenType.CREATE: lambda self: self._parse_create(), 509 TokenType.DELETE: lambda self: self._parse_delete(), 510 TokenType.DESC: lambda self: self._parse_describe(), 511 TokenType.DESCRIBE: lambda self: self._parse_describe(), 512 TokenType.DROP: lambda self: self._parse_drop(), 513 TokenType.FROM: lambda self: exp.select("*").from_( 514 t.cast(exp.From, self._parse_from(skip_from_token=True)) 515 ), 516 TokenType.INSERT: lambda self: self._parse_insert(), 517 TokenType.LOAD: lambda self: self._parse_load(), 518 TokenType.MERGE: lambda self: self._parse_merge(), 519 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 520 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 521 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 522 TokenType.SET: lambda self: self._parse_set(), 523 TokenType.UNCACHE: lambda self: self._parse_uncache(), 524 TokenType.UPDATE: lambda self: self._parse_update(), 525 TokenType.USE: lambda self: self.expression( 526 exp.Use, 527 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 528 and exp.var(self._prev.text), 529 this=self._parse_table(schema=False), 530 ), 531 } 532 533 UNARY_PARSERS = { 534 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 535 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 536 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 537 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 538 } 539 540 PRIMARY_PARSERS = { 541 TokenType.STRING: lambda self, token: self.expression( 542 exp.Literal, this=token.text, is_string=True 543 ), 544 TokenType.NUMBER: lambda self, token: self.expression( 545 exp.Literal, this=token.text, is_string=False 546 ), 547 TokenType.STAR: lambda self, _: self.expression( 548 exp.Star, **{"except": self._parse_except(), "replace": self._parse_replace()} 549 ), 550 TokenType.NULL: lambda self, _: self.expression(exp.Null), 551 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 552 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 553 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 554 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 555 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 556 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 557 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 558 exp.National, this=token.text 559 ), 560 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 561 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 562 } 563 564 PLACEHOLDER_PARSERS = { 565 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 566 TokenType.PARAMETER: lambda self: self._parse_parameter(), 567 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 568 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 569 else None, 570 } 571 572 RANGE_PARSERS = { 573 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 574 TokenType.GLOB: binary_range_parser(exp.Glob), 575 TokenType.ILIKE: binary_range_parser(exp.ILike), 576 TokenType.IN: lambda self, this: self._parse_in(this), 577 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 578 TokenType.IS: lambda self, this: self._parse_is(this), 579 TokenType.LIKE: binary_range_parser(exp.Like), 580 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 581 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 582 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 583 } 584 585 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 586 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 587 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 588 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 589 "CHARACTER SET": lambda self: self._parse_character_set(), 590 "CHECKSUM": lambda self: self._parse_checksum(), 591 "CLUSTER BY": lambda self: self._parse_cluster(), 592 "CLUSTERED": lambda self: self._parse_clustered_by(), 593 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 594 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 595 "COPY": lambda self: self._parse_copy_property(), 596 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 597 "DEFINER": lambda self: self._parse_definer(), 598 "DETERMINISTIC": lambda self: self.expression( 599 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 600 ), 601 "DISTKEY": lambda self: self._parse_distkey(), 602 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 603 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 604 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 605 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 606 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 607 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 608 "FREESPACE": lambda self: self._parse_freespace(), 609 "IMMUTABLE": lambda self: self.expression( 610 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 611 ), 612 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 613 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 614 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 615 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 616 "LIKE": lambda self: self._parse_create_like(), 617 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 618 "LOCK": lambda self: self._parse_locking(), 619 "LOCKING": lambda self: self._parse_locking(), 620 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 621 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 622 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 623 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 624 "NO": lambda self: self._parse_no_property(), 625 "ON": lambda self: self._parse_on_property(), 626 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 627 "PARTITION BY": lambda self: self._parse_partitioned_by(), 628 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 629 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 630 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 631 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 632 "RETURNS": lambda self: self._parse_returns(), 633 "ROW": lambda self: self._parse_row(), 634 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 635 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 636 "SETTINGS": lambda self: self.expression( 637 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 638 ), 639 "SORTKEY": lambda self: self._parse_sortkey(), 640 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 641 "STABLE": lambda self: self.expression( 642 exp.StabilityProperty, this=exp.Literal.string("STABLE") 643 ), 644 "STORED": lambda self: self._parse_stored(), 645 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 646 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 647 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 648 "TO": lambda self: self._parse_to_table(), 649 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 650 "TTL": lambda self: self._parse_ttl(), 651 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 652 "VOLATILE": lambda self: self._parse_volatile_property(), 653 "WITH": lambda self: self._parse_with_property(), 654 } 655 656 CONSTRAINT_PARSERS = { 657 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 658 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 659 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 660 "CHARACTER SET": lambda self: self.expression( 661 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 662 ), 663 "CHECK": lambda self: self.expression( 664 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 665 ), 666 "COLLATE": lambda self: self.expression( 667 exp.CollateColumnConstraint, this=self._parse_var() 668 ), 669 "COMMENT": lambda self: self.expression( 670 exp.CommentColumnConstraint, this=self._parse_string() 671 ), 672 "COMPRESS": lambda self: self._parse_compress(), 673 "DEFAULT": lambda self: self.expression( 674 exp.DefaultColumnConstraint, this=self._parse_bitwise() 675 ), 676 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 677 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 678 "FORMAT": lambda self: self.expression( 679 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 680 ), 681 "GENERATED": lambda self: self._parse_generated_as_identity(), 682 "IDENTITY": lambda self: self._parse_auto_increment(), 683 "INLINE": lambda self: self._parse_inline(), 684 "LIKE": lambda self: self._parse_create_like(), 685 "NOT": lambda self: self._parse_not_constraint(), 686 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 687 "ON": lambda self: self._match(TokenType.UPDATE) 688 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 689 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 690 "PRIMARY KEY": lambda self: self._parse_primary_key(), 691 "REFERENCES": lambda self: self._parse_references(match=False), 692 "TITLE": lambda self: self.expression( 693 exp.TitleColumnConstraint, this=self._parse_var_or_string() 694 ), 695 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 696 "UNIQUE": lambda self: self._parse_unique(), 697 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 698 } 699 700 ALTER_PARSERS = { 701 "ADD": lambda self: self._parse_alter_table_add(), 702 "ALTER": lambda self: self._parse_alter_table_alter(), 703 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 704 "DROP": lambda self: self._parse_alter_table_drop(), 705 "RENAME": lambda self: self._parse_alter_table_rename(), 706 } 707 708 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 709 710 NO_PAREN_FUNCTION_PARSERS = { 711 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 712 TokenType.CASE: lambda self: self._parse_case(), 713 TokenType.IF: lambda self: self._parse_if(), 714 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 715 exp.NextValueFor, 716 this=self._parse_column(), 717 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 718 ), 719 } 720 721 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 722 723 FUNCTION_PARSERS = { 724 "ANY_VALUE": lambda self: self._parse_any_value(), 725 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 726 "CONCAT": lambda self: self._parse_concat(), 727 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 728 "DECODE": lambda self: self._parse_decode(), 729 "EXTRACT": lambda self: self._parse_extract(), 730 "JSON_OBJECT": lambda self: self._parse_json_object(), 731 "LOG": lambda self: self._parse_logarithm(), 732 "MATCH": lambda self: self._parse_match_against(), 733 "OPENJSON": lambda self: self._parse_open_json(), 734 "POSITION": lambda self: self._parse_position(), 735 "SAFE_CAST": lambda self: self._parse_cast(False), 736 "STRING_AGG": lambda self: self._parse_string_agg(), 737 "SUBSTRING": lambda self: self._parse_substring(), 738 "TRIM": lambda self: self._parse_trim(), 739 "TRY_CAST": lambda self: self._parse_cast(False), 740 "TRY_CONVERT": lambda self: self._parse_convert(False), 741 } 742 743 QUERY_MODIFIER_PARSERS = { 744 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 745 TokenType.WHERE: lambda self: ("where", self._parse_where()), 746 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 747 TokenType.HAVING: lambda self: ("having", self._parse_having()), 748 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 749 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 750 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 751 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 752 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 753 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 754 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 755 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 756 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 757 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 758 TokenType.CLUSTER_BY: lambda self: ( 759 "cluster", 760 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 761 ), 762 TokenType.DISTRIBUTE_BY: lambda self: ( 763 "distribute", 764 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 765 ), 766 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 767 } 768 769 SET_PARSERS = { 770 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 771 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 772 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 773 "TRANSACTION": lambda self: self._parse_set_transaction(), 774 } 775 776 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 777 778 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 779 780 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 781 782 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 783 784 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 785 786 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 787 TRANSACTION_CHARACTERISTICS = { 788 "ISOLATION LEVEL REPEATABLE READ", 789 "ISOLATION LEVEL READ COMMITTED", 790 "ISOLATION LEVEL READ UNCOMMITTED", 791 "ISOLATION LEVEL SERIALIZABLE", 792 "READ WRITE", 793 "READ ONLY", 794 } 795 796 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 797 798 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 799 800 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 801 802 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 803 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 804 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 805 806 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 807 808 STRICT_CAST = True 809 810 # A NULL arg in CONCAT yields NULL by default 811 CONCAT_NULL_OUTPUTS_STRING = False 812 813 PREFIXED_PIVOT_COLUMNS = False 814 IDENTIFY_PIVOT_STRINGS = False 815 816 LOG_BASE_FIRST = True 817 LOG_DEFAULTS_TO_LN = False 818 819 __slots__ = ( 820 "error_level", 821 "error_message_context", 822 "max_errors", 823 "sql", 824 "errors", 825 "_tokens", 826 "_index", 827 "_curr", 828 "_next", 829 "_prev", 830 "_prev_comments", 831 ) 832 833 # Autofilled 834 INDEX_OFFSET: int = 0 835 UNNEST_COLUMN_ONLY: bool = False 836 ALIAS_POST_TABLESAMPLE: bool = False 837 STRICT_STRING_CONCAT = False 838 NULL_ORDERING: str = "nulls_are_small" 839 SHOW_TRIE: t.Dict = {} 840 SET_TRIE: t.Dict = {} 841 FORMAT_MAPPING: t.Dict[str, str] = {} 842 FORMAT_TRIE: t.Dict = {} 843 TIME_MAPPING: t.Dict[str, str] = {} 844 TIME_TRIE: t.Dict = {} 845 846 def __init__( 847 self, 848 error_level: t.Optional[ErrorLevel] = None, 849 error_message_context: int = 100, 850 max_errors: int = 3, 851 ): 852 self.error_level = error_level or ErrorLevel.IMMEDIATE 853 self.error_message_context = error_message_context 854 self.max_errors = max_errors 855 self.reset() 856 857 def reset(self): 858 self.sql = "" 859 self.errors = [] 860 self._tokens = [] 861 self._index = 0 862 self._curr = None 863 self._next = None 864 self._prev = None 865 self._prev_comments = None 866 867 def parse( 868 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 869 ) -> t.List[t.Optional[exp.Expression]]: 870 """ 871 Parses a list of tokens and returns a list of syntax trees, one tree 872 per parsed SQL statement. 873 874 Args: 875 raw_tokens: The list of tokens. 876 sql: The original SQL string, used to produce helpful debug messages. 877 878 Returns: 879 The list of the produced syntax trees. 880 """ 881 return self._parse( 882 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 883 ) 884 885 def parse_into( 886 self, 887 expression_types: exp.IntoType, 888 raw_tokens: t.List[Token], 889 sql: t.Optional[str] = None, 890 ) -> t.List[t.Optional[exp.Expression]]: 891 """ 892 Parses a list of tokens into a given Expression type. If a collection of Expression 893 types is given instead, this method will try to parse the token list into each one 894 of them, stopping at the first for which the parsing succeeds. 895 896 Args: 897 expression_types: The expression type(s) to try and parse the token list into. 898 raw_tokens: The list of tokens. 899 sql: The original SQL string, used to produce helpful debug messages. 900 901 Returns: 902 The target Expression. 903 """ 904 errors = [] 905 for expression_type in ensure_list(expression_types): 906 parser = self.EXPRESSION_PARSERS.get(expression_type) 907 if not parser: 908 raise TypeError(f"No parser registered for {expression_type}") 909 910 try: 911 return self._parse(parser, raw_tokens, sql) 912 except ParseError as e: 913 e.errors[0]["into_expression"] = expression_type 914 errors.append(e) 915 916 raise ParseError( 917 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 918 errors=merge_errors(errors), 919 ) from errors[-1] 920 921 def _parse( 922 self, 923 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 924 raw_tokens: t.List[Token], 925 sql: t.Optional[str] = None, 926 ) -> t.List[t.Optional[exp.Expression]]: 927 self.reset() 928 self.sql = sql or "" 929 930 total = len(raw_tokens) 931 chunks: t.List[t.List[Token]] = [[]] 932 933 for i, token in enumerate(raw_tokens): 934 if token.token_type == TokenType.SEMICOLON: 935 if i < total - 1: 936 chunks.append([]) 937 else: 938 chunks[-1].append(token) 939 940 expressions = [] 941 942 for tokens in chunks: 943 self._index = -1 944 self._tokens = tokens 945 self._advance() 946 947 expressions.append(parse_method(self)) 948 949 if self._index < len(self._tokens): 950 self.raise_error("Invalid expression / Unexpected token") 951 952 self.check_errors() 953 954 return expressions 955 956 def check_errors(self) -> None: 957 """Logs or raises any found errors, depending on the chosen error level setting.""" 958 if self.error_level == ErrorLevel.WARN: 959 for error in self.errors: 960 logger.error(str(error)) 961 elif self.error_level == ErrorLevel.RAISE and self.errors: 962 raise ParseError( 963 concat_messages(self.errors, self.max_errors), 964 errors=merge_errors(self.errors), 965 ) 966 967 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 968 """ 969 Appends an error in the list of recorded errors or raises it, depending on the chosen 970 error level setting. 971 """ 972 token = token or self._curr or self._prev or Token.string("") 973 start = token.start 974 end = token.end + 1 975 start_context = self.sql[max(start - self.error_message_context, 0) : start] 976 highlight = self.sql[start:end] 977 end_context = self.sql[end : end + self.error_message_context] 978 979 error = ParseError.new( 980 f"{message}. Line {token.line}, Col: {token.col}.\n" 981 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 982 description=message, 983 line=token.line, 984 col=token.col, 985 start_context=start_context, 986 highlight=highlight, 987 end_context=end_context, 988 ) 989 990 if self.error_level == ErrorLevel.IMMEDIATE: 991 raise error 992 993 self.errors.append(error) 994 995 def expression( 996 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 997 ) -> E: 998 """ 999 Creates a new, validated Expression. 1000 1001 Args: 1002 exp_class: The expression class to instantiate. 1003 comments: An optional list of comments to attach to the expression. 1004 kwargs: The arguments to set for the expression along with their respective values. 1005 1006 Returns: 1007 The target expression. 1008 """ 1009 instance = exp_class(**kwargs) 1010 instance.add_comments(comments) if comments else self._add_comments(instance) 1011 return self.validate_expression(instance) 1012 1013 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1014 if expression and self._prev_comments: 1015 expression.add_comments(self._prev_comments) 1016 self._prev_comments = None 1017 1018 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1019 """ 1020 Validates an Expression, making sure that all its mandatory arguments are set. 1021 1022 Args: 1023 expression: The expression to validate. 1024 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1025 1026 Returns: 1027 The validated expression. 1028 """ 1029 if self.error_level != ErrorLevel.IGNORE: 1030 for error_message in expression.error_messages(args): 1031 self.raise_error(error_message) 1032 1033 return expression 1034 1035 def _find_sql(self, start: Token, end: Token) -> str: 1036 return self.sql[start.start : end.end + 1] 1037 1038 def _advance(self, times: int = 1) -> None: 1039 self._index += times 1040 self._curr = seq_get(self._tokens, self._index) 1041 self._next = seq_get(self._tokens, self._index + 1) 1042 1043 if self._index > 0: 1044 self._prev = self._tokens[self._index - 1] 1045 self._prev_comments = self._prev.comments 1046 else: 1047 self._prev = None 1048 self._prev_comments = None 1049 1050 def _retreat(self, index: int) -> None: 1051 if index != self._index: 1052 self._advance(index - self._index) 1053 1054 def _parse_command(self) -> exp.Command: 1055 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1056 1057 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1058 start = self._prev 1059 exists = self._parse_exists() if allow_exists else None 1060 1061 self._match(TokenType.ON) 1062 1063 kind = self._match_set(self.CREATABLES) and self._prev 1064 if not kind: 1065 return self._parse_as_command(start) 1066 1067 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1068 this = self._parse_user_defined_function(kind=kind.token_type) 1069 elif kind.token_type == TokenType.TABLE: 1070 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1071 elif kind.token_type == TokenType.COLUMN: 1072 this = self._parse_column() 1073 else: 1074 this = self._parse_id_var() 1075 1076 self._match(TokenType.IS) 1077 1078 return self.expression( 1079 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1080 ) 1081 1082 def _parse_to_table( 1083 self, 1084 ) -> exp.ToTableProperty: 1085 table = self._parse_table_parts(schema=True) 1086 return self.expression(exp.ToTableProperty, this=table) 1087 1088 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1089 def _parse_ttl(self) -> exp.Expression: 1090 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1091 this = self._parse_bitwise() 1092 1093 if self._match_text_seq("DELETE"): 1094 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1095 if self._match_text_seq("RECOMPRESS"): 1096 return self.expression( 1097 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1098 ) 1099 if self._match_text_seq("TO", "DISK"): 1100 return self.expression( 1101 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1102 ) 1103 if self._match_text_seq("TO", "VOLUME"): 1104 return self.expression( 1105 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1106 ) 1107 1108 return this 1109 1110 expressions = self._parse_csv(_parse_ttl_action) 1111 where = self._parse_where() 1112 group = self._parse_group() 1113 1114 aggregates = None 1115 if group and self._match(TokenType.SET): 1116 aggregates = self._parse_csv(self._parse_set_item) 1117 1118 return self.expression( 1119 exp.MergeTreeTTL, 1120 expressions=expressions, 1121 where=where, 1122 group=group, 1123 aggregates=aggregates, 1124 ) 1125 1126 def _parse_statement(self) -> t.Optional[exp.Expression]: 1127 if self._curr is None: 1128 return None 1129 1130 if self._match_set(self.STATEMENT_PARSERS): 1131 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1132 1133 if self._match_set(Tokenizer.COMMANDS): 1134 return self._parse_command() 1135 1136 expression = self._parse_expression() 1137 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1138 return self._parse_query_modifiers(expression) 1139 1140 def _parse_drop(self) -> exp.Drop | exp.Command: 1141 start = self._prev 1142 temporary = self._match(TokenType.TEMPORARY) 1143 materialized = self._match_text_seq("MATERIALIZED") 1144 1145 kind = self._match_set(self.CREATABLES) and self._prev.text 1146 if not kind: 1147 return self._parse_as_command(start) 1148 1149 return self.expression( 1150 exp.Drop, 1151 comments=start.comments, 1152 exists=self._parse_exists(), 1153 this=self._parse_table(schema=True), 1154 kind=kind, 1155 temporary=temporary, 1156 materialized=materialized, 1157 cascade=self._match_text_seq("CASCADE"), 1158 constraints=self._match_text_seq("CONSTRAINTS"), 1159 purge=self._match_text_seq("PURGE"), 1160 ) 1161 1162 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1163 return ( 1164 self._match(TokenType.IF) 1165 and (not not_ or self._match(TokenType.NOT)) 1166 and self._match(TokenType.EXISTS) 1167 ) 1168 1169 def _parse_create(self) -> exp.Create | exp.Command: 1170 # Note: this can't be None because we've matched a statement parser 1171 start = self._prev 1172 replace = start.text.upper() == "REPLACE" or self._match_pair( 1173 TokenType.OR, TokenType.REPLACE 1174 ) 1175 unique = self._match(TokenType.UNIQUE) 1176 1177 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1178 self._advance() 1179 1180 properties = None 1181 create_token = self._match_set(self.CREATABLES) and self._prev 1182 1183 if not create_token: 1184 # exp.Properties.Location.POST_CREATE 1185 properties = self._parse_properties() 1186 create_token = self._match_set(self.CREATABLES) and self._prev 1187 1188 if not properties or not create_token: 1189 return self._parse_as_command(start) 1190 1191 exists = self._parse_exists(not_=True) 1192 this = None 1193 expression = None 1194 indexes = None 1195 no_schema_binding = None 1196 begin = None 1197 clone = None 1198 1199 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1200 nonlocal properties 1201 if properties and temp_props: 1202 properties.expressions.extend(temp_props.expressions) 1203 elif temp_props: 1204 properties = temp_props 1205 1206 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1207 this = self._parse_user_defined_function(kind=create_token.token_type) 1208 1209 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1210 extend_props(self._parse_properties()) 1211 1212 self._match(TokenType.ALIAS) 1213 begin = self._match(TokenType.BEGIN) 1214 return_ = self._match_text_seq("RETURN") 1215 expression = self._parse_statement() 1216 1217 if return_: 1218 expression = self.expression(exp.Return, this=expression) 1219 elif create_token.token_type == TokenType.INDEX: 1220 this = self._parse_index(index=self._parse_id_var()) 1221 elif create_token.token_type in self.DB_CREATABLES: 1222 table_parts = self._parse_table_parts(schema=True) 1223 1224 # exp.Properties.Location.POST_NAME 1225 self._match(TokenType.COMMA) 1226 extend_props(self._parse_properties(before=True)) 1227 1228 this = self._parse_schema(this=table_parts) 1229 1230 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1231 extend_props(self._parse_properties()) 1232 1233 self._match(TokenType.ALIAS) 1234 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1235 # exp.Properties.Location.POST_ALIAS 1236 extend_props(self._parse_properties()) 1237 1238 expression = self._parse_ddl_select() 1239 1240 if create_token.token_type == TokenType.TABLE: 1241 # exp.Properties.Location.POST_EXPRESSION 1242 extend_props(self._parse_properties()) 1243 1244 indexes = [] 1245 while True: 1246 index = self._parse_index() 1247 1248 # exp.Properties.Location.POST_INDEX 1249 extend_props(self._parse_properties()) 1250 1251 if not index: 1252 break 1253 else: 1254 self._match(TokenType.COMMA) 1255 indexes.append(index) 1256 elif create_token.token_type == TokenType.VIEW: 1257 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1258 no_schema_binding = True 1259 1260 if self._match_text_seq("CLONE"): 1261 clone = self._parse_table(schema=True) 1262 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1263 clone_kind = ( 1264 self._match(TokenType.L_PAREN) 1265 and self._match_texts(self.CLONE_KINDS) 1266 and self._prev.text.upper() 1267 ) 1268 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1269 self._match(TokenType.R_PAREN) 1270 clone = self.expression( 1271 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1272 ) 1273 1274 return self.expression( 1275 exp.Create, 1276 this=this, 1277 kind=create_token.text, 1278 replace=replace, 1279 unique=unique, 1280 expression=expression, 1281 exists=exists, 1282 properties=properties, 1283 indexes=indexes, 1284 no_schema_binding=no_schema_binding, 1285 begin=begin, 1286 clone=clone, 1287 ) 1288 1289 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1290 # only used for teradata currently 1291 self._match(TokenType.COMMA) 1292 1293 kwargs = { 1294 "no": self._match_text_seq("NO"), 1295 "dual": self._match_text_seq("DUAL"), 1296 "before": self._match_text_seq("BEFORE"), 1297 "default": self._match_text_seq("DEFAULT"), 1298 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1299 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1300 "after": self._match_text_seq("AFTER"), 1301 "minimum": self._match_texts(("MIN", "MINIMUM")), 1302 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1303 } 1304 1305 if self._match_texts(self.PROPERTY_PARSERS): 1306 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1307 try: 1308 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1309 except TypeError: 1310 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1311 1312 return None 1313 1314 def _parse_property(self) -> t.Optional[exp.Expression]: 1315 if self._match_texts(self.PROPERTY_PARSERS): 1316 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1317 1318 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1319 return self._parse_character_set(default=True) 1320 1321 if self._match_text_seq("COMPOUND", "SORTKEY"): 1322 return self._parse_sortkey(compound=True) 1323 1324 if self._match_text_seq("SQL", "SECURITY"): 1325 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1326 1327 assignment = self._match_pair( 1328 TokenType.VAR, TokenType.EQ, advance=False 1329 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1330 1331 if assignment: 1332 key = self._parse_var_or_string() 1333 self._match(TokenType.EQ) 1334 return self.expression(exp.Property, this=key, value=self._parse_column()) 1335 1336 return None 1337 1338 def _parse_stored(self) -> exp.FileFormatProperty: 1339 self._match(TokenType.ALIAS) 1340 1341 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1342 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1343 1344 return self.expression( 1345 exp.FileFormatProperty, 1346 this=self.expression( 1347 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1348 ) 1349 if input_format or output_format 1350 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1351 ) 1352 1353 def _parse_property_assignment(self, exp_class: t.Type[E]) -> E: 1354 self._match(TokenType.EQ) 1355 self._match(TokenType.ALIAS) 1356 return self.expression(exp_class, this=self._parse_field()) 1357 1358 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 1359 properties = [] 1360 while True: 1361 if before: 1362 prop = self._parse_property_before() 1363 else: 1364 prop = self._parse_property() 1365 1366 if not prop: 1367 break 1368 for p in ensure_list(prop): 1369 properties.append(p) 1370 1371 if properties: 1372 return self.expression(exp.Properties, expressions=properties) 1373 1374 return None 1375 1376 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 1377 return self.expression( 1378 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1379 ) 1380 1381 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 1382 if self._index >= 2: 1383 pre_volatile_token = self._tokens[self._index - 2] 1384 else: 1385 pre_volatile_token = None 1386 1387 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 1388 return exp.VolatileProperty() 1389 1390 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1391 1392 def _parse_with_property( 1393 self, 1394 ) -> t.Optional[exp.Expression] | t.List[t.Optional[exp.Expression]]: 1395 if self._match(TokenType.L_PAREN, advance=False): 1396 return self._parse_wrapped_csv(self._parse_property) 1397 1398 if self._match_text_seq("JOURNAL"): 1399 return self._parse_withjournaltable() 1400 1401 if self._match_text_seq("DATA"): 1402 return self._parse_withdata(no=False) 1403 elif self._match_text_seq("NO", "DATA"): 1404 return self._parse_withdata(no=True) 1405 1406 if not self._next: 1407 return None 1408 1409 return self._parse_withisolatedloading() 1410 1411 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1412 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 1413 self._match(TokenType.EQ) 1414 1415 user = self._parse_id_var() 1416 self._match(TokenType.PARAMETER) 1417 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1418 1419 if not user or not host: 1420 return None 1421 1422 return exp.DefinerProperty(this=f"{user}@{host}") 1423 1424 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 1425 self._match(TokenType.TABLE) 1426 self._match(TokenType.EQ) 1427 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1428 1429 def _parse_log(self, no: bool = False) -> exp.LogProperty: 1430 return self.expression(exp.LogProperty, no=no) 1431 1432 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 1433 return self.expression(exp.JournalProperty, **kwargs) 1434 1435 def _parse_checksum(self) -> exp.ChecksumProperty: 1436 self._match(TokenType.EQ) 1437 1438 on = None 1439 if self._match(TokenType.ON): 1440 on = True 1441 elif self._match_text_seq("OFF"): 1442 on = False 1443 1444 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 1445 1446 def _parse_cluster(self) -> exp.Cluster: 1447 return self.expression(exp.Cluster, expressions=self._parse_csv(self._parse_ordered)) 1448 1449 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 1450 self._match_text_seq("BY") 1451 1452 self._match_l_paren() 1453 expressions = self._parse_csv(self._parse_column) 1454 self._match_r_paren() 1455 1456 if self._match_text_seq("SORTED", "BY"): 1457 self._match_l_paren() 1458 sorted_by = self._parse_csv(self._parse_ordered) 1459 self._match_r_paren() 1460 else: 1461 sorted_by = None 1462 1463 self._match(TokenType.INTO) 1464 buckets = self._parse_number() 1465 self._match_text_seq("BUCKETS") 1466 1467 return self.expression( 1468 exp.ClusteredByProperty, 1469 expressions=expressions, 1470 sorted_by=sorted_by, 1471 buckets=buckets, 1472 ) 1473 1474 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 1475 if not self._match_text_seq("GRANTS"): 1476 self._retreat(self._index - 1) 1477 return None 1478 1479 return self.expression(exp.CopyGrantsProperty) 1480 1481 def _parse_freespace(self) -> exp.FreespaceProperty: 1482 self._match(TokenType.EQ) 1483 return self.expression( 1484 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1485 ) 1486 1487 def _parse_mergeblockratio( 1488 self, no: bool = False, default: bool = False 1489 ) -> exp.MergeBlockRatioProperty: 1490 if self._match(TokenType.EQ): 1491 return self.expression( 1492 exp.MergeBlockRatioProperty, 1493 this=self._parse_number(), 1494 percent=self._match(TokenType.PERCENT), 1495 ) 1496 1497 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 1498 1499 def _parse_datablocksize( 1500 self, 1501 default: t.Optional[bool] = None, 1502 minimum: t.Optional[bool] = None, 1503 maximum: t.Optional[bool] = None, 1504 ) -> exp.DataBlocksizeProperty: 1505 self._match(TokenType.EQ) 1506 size = self._parse_number() 1507 1508 units = None 1509 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1510 units = self._prev.text 1511 1512 return self.expression( 1513 exp.DataBlocksizeProperty, 1514 size=size, 1515 units=units, 1516 default=default, 1517 minimum=minimum, 1518 maximum=maximum, 1519 ) 1520 1521 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 1522 self._match(TokenType.EQ) 1523 always = self._match_text_seq("ALWAYS") 1524 manual = self._match_text_seq("MANUAL") 1525 never = self._match_text_seq("NEVER") 1526 default = self._match_text_seq("DEFAULT") 1527 1528 autotemp = None 1529 if self._match_text_seq("AUTOTEMP"): 1530 autotemp = self._parse_schema() 1531 1532 return self.expression( 1533 exp.BlockCompressionProperty, 1534 always=always, 1535 manual=manual, 1536 never=never, 1537 default=default, 1538 autotemp=autotemp, 1539 ) 1540 1541 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty: 1542 no = self._match_text_seq("NO") 1543 concurrent = self._match_text_seq("CONCURRENT") 1544 self._match_text_seq("ISOLATED", "LOADING") 1545 for_all = self._match_text_seq("FOR", "ALL") 1546 for_insert = self._match_text_seq("FOR", "INSERT") 1547 for_none = self._match_text_seq("FOR", "NONE") 1548 return self.expression( 1549 exp.IsolatedLoadingProperty, 1550 no=no, 1551 concurrent=concurrent, 1552 for_all=for_all, 1553 for_insert=for_insert, 1554 for_none=for_none, 1555 ) 1556 1557 def _parse_locking(self) -> exp.LockingProperty: 1558 if self._match(TokenType.TABLE): 1559 kind = "TABLE" 1560 elif self._match(TokenType.VIEW): 1561 kind = "VIEW" 1562 elif self._match(TokenType.ROW): 1563 kind = "ROW" 1564 elif self._match_text_seq("DATABASE"): 1565 kind = "DATABASE" 1566 else: 1567 kind = None 1568 1569 if kind in ("DATABASE", "TABLE", "VIEW"): 1570 this = self._parse_table_parts() 1571 else: 1572 this = None 1573 1574 if self._match(TokenType.FOR): 1575 for_or_in = "FOR" 1576 elif self._match(TokenType.IN): 1577 for_or_in = "IN" 1578 else: 1579 for_or_in = None 1580 1581 if self._match_text_seq("ACCESS"): 1582 lock_type = "ACCESS" 1583 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1584 lock_type = "EXCLUSIVE" 1585 elif self._match_text_seq("SHARE"): 1586 lock_type = "SHARE" 1587 elif self._match_text_seq("READ"): 1588 lock_type = "READ" 1589 elif self._match_text_seq("WRITE"): 1590 lock_type = "WRITE" 1591 elif self._match_text_seq("CHECKSUM"): 1592 lock_type = "CHECKSUM" 1593 else: 1594 lock_type = None 1595 1596 override = self._match_text_seq("OVERRIDE") 1597 1598 return self.expression( 1599 exp.LockingProperty, 1600 this=this, 1601 kind=kind, 1602 for_or_in=for_or_in, 1603 lock_type=lock_type, 1604 override=override, 1605 ) 1606 1607 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1608 if self._match(TokenType.PARTITION_BY): 1609 return self._parse_csv(self._parse_conjunction) 1610 return [] 1611 1612 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 1613 self._match(TokenType.EQ) 1614 return self.expression( 1615 exp.PartitionedByProperty, 1616 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1617 ) 1618 1619 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 1620 if self._match_text_seq("AND", "STATISTICS"): 1621 statistics = True 1622 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1623 statistics = False 1624 else: 1625 statistics = None 1626 1627 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1628 1629 def _parse_no_property(self) -> t.Optional[exp.NoPrimaryIndexProperty]: 1630 if self._match_text_seq("PRIMARY", "INDEX"): 1631 return exp.NoPrimaryIndexProperty() 1632 return None 1633 1634 def _parse_on_property(self) -> t.Optional[exp.Expression]: 1635 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 1636 return exp.OnCommitProperty() 1637 elif self._match_text_seq("COMMIT", "DELETE", "ROWS"): 1638 return exp.OnCommitProperty(delete=True) 1639 return None 1640 1641 def _parse_distkey(self) -> exp.DistKeyProperty: 1642 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1643 1644 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 1645 table = self._parse_table(schema=True) 1646 1647 options = [] 1648 while self._match_texts(("INCLUDING", "EXCLUDING")): 1649 this = self._prev.text.upper() 1650 1651 id_var = self._parse_id_var() 1652 if not id_var: 1653 return None 1654 1655 options.append( 1656 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 1657 ) 1658 1659 return self.expression(exp.LikeProperty, this=table, expressions=options) 1660 1661 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 1662 return self.expression( 1663 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 1664 ) 1665 1666 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 1667 self._match(TokenType.EQ) 1668 return self.expression( 1669 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1670 ) 1671 1672 def _parse_returns(self) -> exp.ReturnsProperty: 1673 value: t.Optional[exp.Expression] 1674 is_table = self._match(TokenType.TABLE) 1675 1676 if is_table: 1677 if self._match(TokenType.LT): 1678 value = self.expression( 1679 exp.Schema, 1680 this="TABLE", 1681 expressions=self._parse_csv(self._parse_struct_types), 1682 ) 1683 if not self._match(TokenType.GT): 1684 self.raise_error("Expecting >") 1685 else: 1686 value = self._parse_schema(exp.var("TABLE")) 1687 else: 1688 value = self._parse_types() 1689 1690 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1691 1692 def _parse_describe(self) -> exp.Describe: 1693 kind = self._match_set(self.CREATABLES) and self._prev.text 1694 this = self._parse_table() 1695 return self.expression(exp.Describe, this=this, kind=kind) 1696 1697 def _parse_insert(self) -> exp.Insert: 1698 comments = ensure_list(self._prev_comments) 1699 overwrite = self._match(TokenType.OVERWRITE) 1700 ignore = self._match(TokenType.IGNORE) 1701 local = self._match_text_seq("LOCAL") 1702 alternative = None 1703 1704 if self._match_text_seq("DIRECTORY"): 1705 this: t.Optional[exp.Expression] = self.expression( 1706 exp.Directory, 1707 this=self._parse_var_or_string(), 1708 local=local, 1709 row_format=self._parse_row_format(match_row=True), 1710 ) 1711 else: 1712 if self._match(TokenType.OR): 1713 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1714 1715 self._match(TokenType.INTO) 1716 comments += ensure_list(self._prev_comments) 1717 self._match(TokenType.TABLE) 1718 this = self._parse_table(schema=True) 1719 1720 returning = self._parse_returning() 1721 1722 return self.expression( 1723 exp.Insert, 1724 comments=comments, 1725 this=this, 1726 exists=self._parse_exists(), 1727 partition=self._parse_partition(), 1728 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 1729 and self._parse_conjunction(), 1730 expression=self._parse_ddl_select(), 1731 conflict=self._parse_on_conflict(), 1732 returning=returning or self._parse_returning(), 1733 overwrite=overwrite, 1734 alternative=alternative, 1735 ignore=ignore, 1736 ) 1737 1738 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 1739 conflict = self._match_text_seq("ON", "CONFLICT") 1740 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1741 1742 if not conflict and not duplicate: 1743 return None 1744 1745 nothing = None 1746 expressions = None 1747 key = None 1748 constraint = None 1749 1750 if conflict: 1751 if self._match_text_seq("ON", "CONSTRAINT"): 1752 constraint = self._parse_id_var() 1753 else: 1754 key = self._parse_csv(self._parse_value) 1755 1756 self._match_text_seq("DO") 1757 if self._match_text_seq("NOTHING"): 1758 nothing = True 1759 else: 1760 self._match(TokenType.UPDATE) 1761 self._match(TokenType.SET) 1762 expressions = self._parse_csv(self._parse_equality) 1763 1764 return self.expression( 1765 exp.OnConflict, 1766 duplicate=duplicate, 1767 expressions=expressions, 1768 nothing=nothing, 1769 key=key, 1770 constraint=constraint, 1771 ) 1772 1773 def _parse_returning(self) -> t.Optional[exp.Returning]: 1774 if not self._match(TokenType.RETURNING): 1775 return None 1776 return self.expression( 1777 exp.Returning, 1778 expressions=self._parse_csv(self._parse_expression), 1779 into=self._match(TokenType.INTO) and self._parse_table_part(), 1780 ) 1781 1782 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1783 if not self._match(TokenType.FORMAT): 1784 return None 1785 return self._parse_row_format() 1786 1787 def _parse_row_format( 1788 self, match_row: bool = False 1789 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 1790 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1791 return None 1792 1793 if self._match_text_seq("SERDE"): 1794 this = self._parse_string() 1795 1796 serde_properties = None 1797 if self._match(TokenType.SERDE_PROPERTIES): 1798 serde_properties = self.expression( 1799 exp.SerdeProperties, expressions=self._parse_wrapped_csv(self._parse_property) 1800 ) 1801 1802 return self.expression( 1803 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 1804 ) 1805 1806 self._match_text_seq("DELIMITED") 1807 1808 kwargs = {} 1809 1810 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1811 kwargs["fields"] = self._parse_string() 1812 if self._match_text_seq("ESCAPED", "BY"): 1813 kwargs["escaped"] = self._parse_string() 1814 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1815 kwargs["collection_items"] = self._parse_string() 1816 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1817 kwargs["map_keys"] = self._parse_string() 1818 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1819 kwargs["lines"] = self._parse_string() 1820 if self._match_text_seq("NULL", "DEFINED", "AS"): 1821 kwargs["null"] = self._parse_string() 1822 1823 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1824 1825 def _parse_load(self) -> exp.LoadData | exp.Command: 1826 if self._match_text_seq("DATA"): 1827 local = self._match_text_seq("LOCAL") 1828 self._match_text_seq("INPATH") 1829 inpath = self._parse_string() 1830 overwrite = self._match(TokenType.OVERWRITE) 1831 self._match_pair(TokenType.INTO, TokenType.TABLE) 1832 1833 return self.expression( 1834 exp.LoadData, 1835 this=self._parse_table(schema=True), 1836 local=local, 1837 overwrite=overwrite, 1838 inpath=inpath, 1839 partition=self._parse_partition(), 1840 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1841 serde=self._match_text_seq("SERDE") and self._parse_string(), 1842 ) 1843 return self._parse_as_command(self._prev) 1844 1845 def _parse_delete(self) -> exp.Delete: 1846 # This handles MySQL's "Multiple-Table Syntax" 1847 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 1848 tables = None 1849 comments = self._prev_comments 1850 if not self._match(TokenType.FROM, advance=False): 1851 tables = self._parse_csv(self._parse_table) or None 1852 1853 returning = self._parse_returning() 1854 1855 return self.expression( 1856 exp.Delete, 1857 comments=comments, 1858 tables=tables, 1859 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 1860 using=self._match(TokenType.USING) and self._parse_table(joins=True), 1861 where=self._parse_where(), 1862 returning=returning or self._parse_returning(), 1863 limit=self._parse_limit(), 1864 ) 1865 1866 def _parse_update(self) -> exp.Update: 1867 comments = self._prev_comments 1868 this = self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS) 1869 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1870 returning = self._parse_returning() 1871 return self.expression( 1872 exp.Update, 1873 comments=comments, 1874 **{ # type: ignore 1875 "this": this, 1876 "expressions": expressions, 1877 "from": self._parse_from(joins=True), 1878 "where": self._parse_where(), 1879 "returning": returning or self._parse_returning(), 1880 "limit": self._parse_limit(), 1881 }, 1882 ) 1883 1884 def _parse_uncache(self) -> exp.Uncache: 1885 if not self._match(TokenType.TABLE): 1886 self.raise_error("Expecting TABLE after UNCACHE") 1887 1888 return self.expression( 1889 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 1890 ) 1891 1892 def _parse_cache(self) -> exp.Cache: 1893 lazy = self._match_text_seq("LAZY") 1894 self._match(TokenType.TABLE) 1895 table = self._parse_table(schema=True) 1896 1897 options = [] 1898 if self._match_text_seq("OPTIONS"): 1899 self._match_l_paren() 1900 k = self._parse_string() 1901 self._match(TokenType.EQ) 1902 v = self._parse_string() 1903 options = [k, v] 1904 self._match_r_paren() 1905 1906 self._match(TokenType.ALIAS) 1907 return self.expression( 1908 exp.Cache, 1909 this=table, 1910 lazy=lazy, 1911 options=options, 1912 expression=self._parse_select(nested=True), 1913 ) 1914 1915 def _parse_partition(self) -> t.Optional[exp.Partition]: 1916 if not self._match(TokenType.PARTITION): 1917 return None 1918 1919 return self.expression( 1920 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1921 ) 1922 1923 def _parse_value(self) -> exp.Tuple: 1924 if self._match(TokenType.L_PAREN): 1925 expressions = self._parse_csv(self._parse_conjunction) 1926 self._match_r_paren() 1927 return self.expression(exp.Tuple, expressions=expressions) 1928 1929 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1930 # https://prestodb.io/docs/current/sql/values.html 1931 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1932 1933 def _parse_select( 1934 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1935 ) -> t.Optional[exp.Expression]: 1936 cte = self._parse_with() 1937 if cte: 1938 this = self._parse_statement() 1939 1940 if not this: 1941 self.raise_error("Failed to parse any statement following CTE") 1942 return cte 1943 1944 if "with" in this.arg_types: 1945 this.set("with", cte) 1946 else: 1947 self.raise_error(f"{this.key} does not support CTE") 1948 this = cte 1949 elif self._match(TokenType.SELECT): 1950 comments = self._prev_comments 1951 1952 hint = self._parse_hint() 1953 all_ = self._match(TokenType.ALL) 1954 distinct = self._match(TokenType.DISTINCT) 1955 1956 kind = ( 1957 self._match(TokenType.ALIAS) 1958 and self._match_texts(("STRUCT", "VALUE")) 1959 and self._prev.text 1960 ) 1961 1962 if distinct: 1963 distinct = self.expression( 1964 exp.Distinct, 1965 on=self._parse_value() if self._match(TokenType.ON) else None, 1966 ) 1967 1968 if all_ and distinct: 1969 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1970 1971 limit = self._parse_limit(top=True) 1972 expressions = self._parse_expressions() 1973 1974 this = self.expression( 1975 exp.Select, 1976 kind=kind, 1977 hint=hint, 1978 distinct=distinct, 1979 expressions=expressions, 1980 limit=limit, 1981 ) 1982 this.comments = comments 1983 1984 into = self._parse_into() 1985 if into: 1986 this.set("into", into) 1987 1988 from_ = self._parse_from() 1989 if from_: 1990 this.set("from", from_) 1991 1992 this = self._parse_query_modifiers(this) 1993 elif (table or nested) and self._match(TokenType.L_PAREN): 1994 if self._match(TokenType.PIVOT): 1995 this = self._parse_simplified_pivot() 1996 elif self._match(TokenType.FROM): 1997 this = exp.select("*").from_( 1998 t.cast(exp.From, self._parse_from(skip_from_token=True)) 1999 ) 2000 else: 2001 this = self._parse_table() if table else self._parse_select(nested=True) 2002 this = self._parse_set_operations(self._parse_query_modifiers(this)) 2003 2004 self._match_r_paren() 2005 2006 # We return early here so that the UNION isn't attached to the subquery by the 2007 # following call to _parse_set_operations, but instead becomes the parent node 2008 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 2009 elif self._match(TokenType.VALUES): 2010 this = self.expression( 2011 exp.Values, 2012 expressions=self._parse_csv(self._parse_value), 2013 alias=self._parse_table_alias(), 2014 ) 2015 else: 2016 this = None 2017 2018 return self._parse_set_operations(this) 2019 2020 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 2021 if not skip_with_token and not self._match(TokenType.WITH): 2022 return None 2023 2024 comments = self._prev_comments 2025 recursive = self._match(TokenType.RECURSIVE) 2026 2027 expressions = [] 2028 while True: 2029 expressions.append(self._parse_cte()) 2030 2031 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 2032 break 2033 else: 2034 self._match(TokenType.WITH) 2035 2036 return self.expression( 2037 exp.With, comments=comments, expressions=expressions, recursive=recursive 2038 ) 2039 2040 def _parse_cte(self) -> exp.CTE: 2041 alias = self._parse_table_alias() 2042 if not alias or not alias.this: 2043 self.raise_error("Expected CTE to have alias") 2044 2045 self._match(TokenType.ALIAS) 2046 return self.expression( 2047 exp.CTE, this=self._parse_wrapped(self._parse_statement), alias=alias 2048 ) 2049 2050 def _parse_table_alias( 2051 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2052 ) -> t.Optional[exp.TableAlias]: 2053 any_token = self._match(TokenType.ALIAS) 2054 alias = ( 2055 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2056 or self._parse_string_as_identifier() 2057 ) 2058 2059 index = self._index 2060 if self._match(TokenType.L_PAREN): 2061 columns = self._parse_csv(self._parse_function_parameter) 2062 self._match_r_paren() if columns else self._retreat(index) 2063 else: 2064 columns = None 2065 2066 if not alias and not columns: 2067 return None 2068 2069 return self.expression(exp.TableAlias, this=alias, columns=columns) 2070 2071 def _parse_subquery( 2072 self, this: t.Optional[exp.Expression], parse_alias: bool = True 2073 ) -> t.Optional[exp.Subquery]: 2074 if not this: 2075 return None 2076 2077 return self.expression( 2078 exp.Subquery, 2079 this=this, 2080 pivots=self._parse_pivots(), 2081 alias=self._parse_table_alias() if parse_alias else None, 2082 ) 2083 2084 def _parse_query_modifiers( 2085 self, this: t.Optional[exp.Expression] 2086 ) -> t.Optional[exp.Expression]: 2087 if isinstance(this, self.MODIFIABLES): 2088 for join in iter(self._parse_join, None): 2089 this.append("joins", join) 2090 for lateral in iter(self._parse_lateral, None): 2091 this.append("laterals", lateral) 2092 2093 while True: 2094 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 2095 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 2096 key, expression = parser(self) 2097 2098 if expression: 2099 this.set(key, expression) 2100 if key == "limit": 2101 offset = expression.args.pop("offset", None) 2102 if offset: 2103 this.set("offset", exp.Offset(expression=offset)) 2104 continue 2105 break 2106 return this 2107 2108 def _parse_hint(self) -> t.Optional[exp.Hint]: 2109 if self._match(TokenType.HINT): 2110 hints = [] 2111 for hint in iter(lambda: self._parse_csv(self._parse_function), []): 2112 hints.extend(hint) 2113 2114 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2115 self.raise_error("Expected */ after HINT") 2116 2117 return self.expression(exp.Hint, expressions=hints) 2118 2119 return None 2120 2121 def _parse_into(self) -> t.Optional[exp.Into]: 2122 if not self._match(TokenType.INTO): 2123 return None 2124 2125 temp = self._match(TokenType.TEMPORARY) 2126 unlogged = self._match_text_seq("UNLOGGED") 2127 self._match(TokenType.TABLE) 2128 2129 return self.expression( 2130 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2131 ) 2132 2133 def _parse_from( 2134 self, joins: bool = False, skip_from_token: bool = False 2135 ) -> t.Optional[exp.From]: 2136 if not skip_from_token and not self._match(TokenType.FROM): 2137 return None 2138 2139 return self.expression( 2140 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 2141 ) 2142 2143 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 2144 if not self._match(TokenType.MATCH_RECOGNIZE): 2145 return None 2146 2147 self._match_l_paren() 2148 2149 partition = self._parse_partition_by() 2150 order = self._parse_order() 2151 measures = self._parse_expressions() if self._match_text_seq("MEASURES") else None 2152 2153 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2154 rows = exp.var("ONE ROW PER MATCH") 2155 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2156 text = "ALL ROWS PER MATCH" 2157 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2158 text += f" SHOW EMPTY MATCHES" 2159 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2160 text += f" OMIT EMPTY MATCHES" 2161 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2162 text += f" WITH UNMATCHED ROWS" 2163 rows = exp.var(text) 2164 else: 2165 rows = None 2166 2167 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2168 text = "AFTER MATCH SKIP" 2169 if self._match_text_seq("PAST", "LAST", "ROW"): 2170 text += f" PAST LAST ROW" 2171 elif self._match_text_seq("TO", "NEXT", "ROW"): 2172 text += f" TO NEXT ROW" 2173 elif self._match_text_seq("TO", "FIRST"): 2174 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2175 elif self._match_text_seq("TO", "LAST"): 2176 text += f" TO LAST {self._advance_any().text}" # type: ignore 2177 after = exp.var(text) 2178 else: 2179 after = None 2180 2181 if self._match_text_seq("PATTERN"): 2182 self._match_l_paren() 2183 2184 if not self._curr: 2185 self.raise_error("Expecting )", self._curr) 2186 2187 paren = 1 2188 start = self._curr 2189 2190 while self._curr and paren > 0: 2191 if self._curr.token_type == TokenType.L_PAREN: 2192 paren += 1 2193 if self._curr.token_type == TokenType.R_PAREN: 2194 paren -= 1 2195 2196 end = self._prev 2197 self._advance() 2198 2199 if paren > 0: 2200 self.raise_error("Expecting )", self._curr) 2201 2202 pattern = exp.var(self._find_sql(start, end)) 2203 else: 2204 pattern = None 2205 2206 define = ( 2207 self._parse_csv( 2208 lambda: self.expression( 2209 exp.Alias, 2210 alias=self._parse_id_var(any_token=True), 2211 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2212 ) 2213 ) 2214 if self._match_text_seq("DEFINE") 2215 else None 2216 ) 2217 2218 self._match_r_paren() 2219 2220 return self.expression( 2221 exp.MatchRecognize, 2222 partition_by=partition, 2223 order=order, 2224 measures=measures, 2225 rows=rows, 2226 after=after, 2227 pattern=pattern, 2228 define=define, 2229 alias=self._parse_table_alias(), 2230 ) 2231 2232 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 2233 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2234 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2235 2236 if outer_apply or cross_apply: 2237 this = self._parse_select(table=True) 2238 view = None 2239 outer = not cross_apply 2240 elif self._match(TokenType.LATERAL): 2241 this = self._parse_select(table=True) 2242 view = self._match(TokenType.VIEW) 2243 outer = self._match(TokenType.OUTER) 2244 else: 2245 return None 2246 2247 if not this: 2248 this = ( 2249 self._parse_unnest() 2250 or self._parse_function() 2251 or self._parse_id_var(any_token=False) 2252 ) 2253 2254 while self._match(TokenType.DOT): 2255 this = exp.Dot( 2256 this=this, 2257 expression=self._parse_function() or self._parse_id_var(any_token=False), 2258 ) 2259 2260 if view: 2261 table = self._parse_id_var(any_token=False) 2262 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2263 table_alias: t.Optional[exp.TableAlias] = self.expression( 2264 exp.TableAlias, this=table, columns=columns 2265 ) 2266 elif isinstance(this, exp.Subquery) and this.alias: 2267 # Ensures parity between the Subquery's and the Lateral's "alias" args 2268 table_alias = this.args["alias"].copy() 2269 else: 2270 table_alias = self._parse_table_alias() 2271 2272 return self.expression(exp.Lateral, this=this, view=view, outer=outer, alias=table_alias) 2273 2274 def _parse_join_parts( 2275 self, 2276 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2277 return ( 2278 self._match_set(self.JOIN_METHODS) and self._prev, 2279 self._match_set(self.JOIN_SIDES) and self._prev, 2280 self._match_set(self.JOIN_KINDS) and self._prev, 2281 ) 2282 2283 def _parse_join( 2284 self, skip_join_token: bool = False, parse_bracket: bool = False 2285 ) -> t.Optional[exp.Join]: 2286 if self._match(TokenType.COMMA): 2287 return self.expression(exp.Join, this=self._parse_table()) 2288 2289 index = self._index 2290 method, side, kind = self._parse_join_parts() 2291 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2292 join = self._match(TokenType.JOIN) 2293 2294 if not skip_join_token and not join: 2295 self._retreat(index) 2296 kind = None 2297 method = None 2298 side = None 2299 2300 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2301 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2302 2303 if not skip_join_token and not join and not outer_apply and not cross_apply: 2304 return None 2305 2306 if outer_apply: 2307 side = Token(TokenType.LEFT, "LEFT") 2308 2309 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 2310 2311 if method: 2312 kwargs["method"] = method.text 2313 if side: 2314 kwargs["side"] = side.text 2315 if kind: 2316 kwargs["kind"] = kind.text 2317 if hint: 2318 kwargs["hint"] = hint 2319 2320 if self._match(TokenType.ON): 2321 kwargs["on"] = self._parse_conjunction() 2322 elif self._match(TokenType.USING): 2323 kwargs["using"] = self._parse_wrapped_id_vars() 2324 elif not (kind and kind.token_type == TokenType.CROSS): 2325 index = self._index 2326 joins = self._parse_joins() 2327 2328 if joins and self._match(TokenType.ON): 2329 kwargs["on"] = self._parse_conjunction() 2330 elif joins and self._match(TokenType.USING): 2331 kwargs["using"] = self._parse_wrapped_id_vars() 2332 else: 2333 joins = None 2334 self._retreat(index) 2335 2336 kwargs["this"].set("joins", joins) 2337 2338 return self.expression(exp.Join, **kwargs) 2339 2340 def _parse_index( 2341 self, 2342 index: t.Optional[exp.Expression] = None, 2343 ) -> t.Optional[exp.Index]: 2344 if index: 2345 unique = None 2346 primary = None 2347 amp = None 2348 2349 self._match(TokenType.ON) 2350 self._match(TokenType.TABLE) # hive 2351 table = self._parse_table_parts(schema=True) 2352 else: 2353 unique = self._match(TokenType.UNIQUE) 2354 primary = self._match_text_seq("PRIMARY") 2355 amp = self._match_text_seq("AMP") 2356 2357 if not self._match(TokenType.INDEX): 2358 return None 2359 2360 index = self._parse_id_var() 2361 table = None 2362 2363 using = self._parse_field() if self._match(TokenType.USING) else None 2364 2365 if self._match(TokenType.L_PAREN, advance=False): 2366 columns = self._parse_wrapped_csv(self._parse_ordered) 2367 else: 2368 columns = None 2369 2370 return self.expression( 2371 exp.Index, 2372 this=index, 2373 table=table, 2374 using=using, 2375 columns=columns, 2376 unique=unique, 2377 primary=primary, 2378 amp=amp, 2379 partition_by=self._parse_partition_by(), 2380 ) 2381 2382 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 2383 hints: t.List[exp.Expression] = [] 2384 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2385 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 2386 hints.append( 2387 self.expression( 2388 exp.WithTableHint, 2389 expressions=self._parse_csv( 2390 lambda: self._parse_function() or self._parse_var(any_token=True) 2391 ), 2392 ) 2393 ) 2394 self._match_r_paren() 2395 else: 2396 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 2397 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 2398 hint = exp.IndexTableHint(this=self._prev.text.upper()) 2399 2400 self._match_texts({"INDEX", "KEY"}) 2401 if self._match(TokenType.FOR): 2402 hint.set("target", self._advance_any() and self._prev.text.upper()) 2403 2404 hint.set("expressions", self._parse_wrapped_id_vars()) 2405 hints.append(hint) 2406 2407 return hints or None 2408 2409 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2410 return ( 2411 (not schema and self._parse_function(optional_parens=False)) 2412 or self._parse_id_var(any_token=False) 2413 or self._parse_string_as_identifier() 2414 or self._parse_placeholder() 2415 ) 2416 2417 def _parse_table_parts(self, schema: bool = False) -> exp.Table: 2418 catalog = None 2419 db = None 2420 table = self._parse_table_part(schema=schema) 2421 2422 while self._match(TokenType.DOT): 2423 if catalog: 2424 # This allows nesting the table in arbitrarily many dot expressions if needed 2425 table = self.expression( 2426 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2427 ) 2428 else: 2429 catalog = db 2430 db = table 2431 table = self._parse_table_part(schema=schema) 2432 2433 if not table: 2434 self.raise_error(f"Expected table name but got {self._curr}") 2435 2436 return self.expression( 2437 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2438 ) 2439 2440 def _parse_table( 2441 self, 2442 schema: bool = False, 2443 joins: bool = False, 2444 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 2445 parse_bracket: bool = False, 2446 ) -> t.Optional[exp.Expression]: 2447 lateral = self._parse_lateral() 2448 if lateral: 2449 return lateral 2450 2451 unnest = self._parse_unnest() 2452 if unnest: 2453 return unnest 2454 2455 values = self._parse_derived_table_values() 2456 if values: 2457 return values 2458 2459 subquery = self._parse_select(table=True) 2460 if subquery: 2461 if not subquery.args.get("pivots"): 2462 subquery.set("pivots", self._parse_pivots()) 2463 return subquery 2464 2465 bracket = parse_bracket and self._parse_bracket(None) 2466 bracket = self.expression(exp.Table, this=bracket) if bracket else None 2467 this: exp.Expression = bracket or self._parse_table_parts(schema=schema) 2468 2469 if schema: 2470 return self._parse_schema(this=this) 2471 2472 if self.ALIAS_POST_TABLESAMPLE: 2473 table_sample = self._parse_table_sample() 2474 2475 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2476 if alias: 2477 this.set("alias", alias) 2478 2479 if not this.args.get("pivots"): 2480 this.set("pivots", self._parse_pivots()) 2481 2482 this.set("hints", self._parse_table_hints()) 2483 2484 if not self.ALIAS_POST_TABLESAMPLE: 2485 table_sample = self._parse_table_sample() 2486 2487 if table_sample: 2488 table_sample.set("this", this) 2489 this = table_sample 2490 2491 if joins: 2492 for join in iter(self._parse_join, None): 2493 this.append("joins", join) 2494 2495 return this 2496 2497 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 2498 if not self._match(TokenType.UNNEST): 2499 return None 2500 2501 expressions = self._parse_wrapped_csv(self._parse_type) 2502 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2503 2504 alias = self._parse_table_alias() if with_alias else None 2505 2506 if alias and self.UNNEST_COLUMN_ONLY: 2507 if alias.args.get("columns"): 2508 self.raise_error("Unexpected extra column alias in unnest.") 2509 2510 alias.set("columns", [alias.this]) 2511 alias.set("this", None) 2512 2513 offset = None 2514 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2515 self._match(TokenType.ALIAS) 2516 offset = self._parse_id_var() or exp.to_identifier("offset") 2517 2518 return self.expression( 2519 exp.Unnest, expressions=expressions, ordinality=ordinality, alias=alias, offset=offset 2520 ) 2521 2522 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 2523 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2524 if not is_derived and not self._match(TokenType.VALUES): 2525 return None 2526 2527 expressions = self._parse_csv(self._parse_value) 2528 alias = self._parse_table_alias() 2529 2530 if is_derived: 2531 self._match_r_paren() 2532 2533 return self.expression( 2534 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 2535 ) 2536 2537 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 2538 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2539 as_modifier and self._match_text_seq("USING", "SAMPLE") 2540 ): 2541 return None 2542 2543 bucket_numerator = None 2544 bucket_denominator = None 2545 bucket_field = None 2546 percent = None 2547 rows = None 2548 size = None 2549 seed = None 2550 2551 kind = ( 2552 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2553 ) 2554 method = self._parse_var(tokens=(TokenType.ROW,)) 2555 2556 self._match(TokenType.L_PAREN) 2557 2558 num = self._parse_number() 2559 2560 if self._match_text_seq("BUCKET"): 2561 bucket_numerator = self._parse_number() 2562 self._match_text_seq("OUT", "OF") 2563 bucket_denominator = bucket_denominator = self._parse_number() 2564 self._match(TokenType.ON) 2565 bucket_field = self._parse_field() 2566 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2567 percent = num 2568 elif self._match(TokenType.ROWS): 2569 rows = num 2570 else: 2571 size = num 2572 2573 self._match(TokenType.R_PAREN) 2574 2575 if self._match(TokenType.L_PAREN): 2576 method = self._parse_var() 2577 seed = self._match(TokenType.COMMA) and self._parse_number() 2578 self._match_r_paren() 2579 elif self._match_texts(("SEED", "REPEATABLE")): 2580 seed = self._parse_wrapped(self._parse_number) 2581 2582 return self.expression( 2583 exp.TableSample, 2584 method=method, 2585 bucket_numerator=bucket_numerator, 2586 bucket_denominator=bucket_denominator, 2587 bucket_field=bucket_field, 2588 percent=percent, 2589 rows=rows, 2590 size=size, 2591 seed=seed, 2592 kind=kind, 2593 ) 2594 2595 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 2596 return list(iter(self._parse_pivot, None)) or None 2597 2598 def _parse_joins(self) -> t.Optional[t.List[exp.Join]]: 2599 return list(iter(self._parse_join, None)) or None 2600 2601 # https://duckdb.org/docs/sql/statements/pivot 2602 def _parse_simplified_pivot(self) -> exp.Pivot: 2603 def _parse_on() -> t.Optional[exp.Expression]: 2604 this = self._parse_bitwise() 2605 return self._parse_in(this) if self._match(TokenType.IN) else this 2606 2607 this = self._parse_table() 2608 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 2609 using = self._match(TokenType.USING) and self._parse_csv( 2610 lambda: self._parse_alias(self._parse_function()) 2611 ) 2612 group = self._parse_group() 2613 return self.expression( 2614 exp.Pivot, this=this, expressions=expressions, using=using, group=group 2615 ) 2616 2617 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 2618 index = self._index 2619 2620 if self._match(TokenType.PIVOT): 2621 unpivot = False 2622 elif self._match(TokenType.UNPIVOT): 2623 unpivot = True 2624 else: 2625 return None 2626 2627 expressions = [] 2628 field = None 2629 2630 if not self._match(TokenType.L_PAREN): 2631 self._retreat(index) 2632 return None 2633 2634 if unpivot: 2635 expressions = self._parse_csv(self._parse_column) 2636 else: 2637 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2638 2639 if not expressions: 2640 self.raise_error("Failed to parse PIVOT's aggregation list") 2641 2642 if not self._match(TokenType.FOR): 2643 self.raise_error("Expecting FOR") 2644 2645 value = self._parse_column() 2646 2647 if not self._match(TokenType.IN): 2648 self.raise_error("Expecting IN") 2649 2650 field = self._parse_in(value, alias=True) 2651 2652 self._match_r_paren() 2653 2654 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2655 2656 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2657 pivot.set("alias", self._parse_table_alias()) 2658 2659 if not unpivot: 2660 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2661 2662 columns: t.List[exp.Expression] = [] 2663 for fld in pivot.args["field"].expressions: 2664 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2665 for name in names: 2666 if self.PREFIXED_PIVOT_COLUMNS: 2667 name = f"{name}_{field_name}" if name else field_name 2668 else: 2669 name = f"{field_name}_{name}" if name else field_name 2670 2671 columns.append(exp.to_identifier(name)) 2672 2673 pivot.set("columns", columns) 2674 2675 return pivot 2676 2677 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 2678 return [agg.alias for agg in aggregations] 2679 2680 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 2681 if not skip_where_token and not self._match(TokenType.WHERE): 2682 return None 2683 2684 return self.expression( 2685 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2686 ) 2687 2688 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 2689 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2690 return None 2691 2692 elements = defaultdict(list) 2693 2694 if self._match(TokenType.ALL): 2695 return self.expression(exp.Group, all=True) 2696 2697 while True: 2698 expressions = self._parse_csv(self._parse_conjunction) 2699 if expressions: 2700 elements["expressions"].extend(expressions) 2701 2702 grouping_sets = self._parse_grouping_sets() 2703 if grouping_sets: 2704 elements["grouping_sets"].extend(grouping_sets) 2705 2706 rollup = None 2707 cube = None 2708 totals = None 2709 2710 with_ = self._match(TokenType.WITH) 2711 if self._match(TokenType.ROLLUP): 2712 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2713 elements["rollup"].extend(ensure_list(rollup)) 2714 2715 if self._match(TokenType.CUBE): 2716 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2717 elements["cube"].extend(ensure_list(cube)) 2718 2719 if self._match_text_seq("TOTALS"): 2720 totals = True 2721 elements["totals"] = True # type: ignore 2722 2723 if not (grouping_sets or rollup or cube or totals): 2724 break 2725 2726 return self.expression(exp.Group, **elements) # type: ignore 2727 2728 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2729 if not self._match(TokenType.GROUPING_SETS): 2730 return None 2731 2732 return self._parse_wrapped_csv(self._parse_grouping_set) 2733 2734 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2735 if self._match(TokenType.L_PAREN): 2736 grouping_set = self._parse_csv(self._parse_column) 2737 self._match_r_paren() 2738 return self.expression(exp.Tuple, expressions=grouping_set) 2739 2740 return self._parse_column() 2741 2742 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 2743 if not skip_having_token and not self._match(TokenType.HAVING): 2744 return None 2745 return self.expression(exp.Having, this=self._parse_conjunction()) 2746 2747 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 2748 if not self._match(TokenType.QUALIFY): 2749 return None 2750 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2751 2752 def _parse_order( 2753 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2754 ) -> t.Optional[exp.Expression]: 2755 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2756 return this 2757 2758 return self.expression( 2759 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2760 ) 2761 2762 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 2763 if not self._match(token): 2764 return None 2765 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2766 2767 def _parse_ordered(self) -> exp.Ordered: 2768 this = self._parse_conjunction() 2769 self._match(TokenType.ASC) 2770 2771 is_desc = self._match(TokenType.DESC) 2772 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 2773 is_nulls_last = self._match_text_seq("NULLS", "LAST") 2774 desc = is_desc or False 2775 asc = not desc 2776 nulls_first = is_nulls_first or False 2777 explicitly_null_ordered = is_nulls_first or is_nulls_last 2778 2779 if ( 2780 not explicitly_null_ordered 2781 and ( 2782 (asc and self.NULL_ORDERING == "nulls_are_small") 2783 or (desc and self.NULL_ORDERING != "nulls_are_small") 2784 ) 2785 and self.NULL_ORDERING != "nulls_are_last" 2786 ): 2787 nulls_first = True 2788 2789 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2790 2791 def _parse_limit( 2792 self, this: t.Optional[exp.Expression] = None, top: bool = False 2793 ) -> t.Optional[exp.Expression]: 2794 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2795 comments = self._prev_comments 2796 if top: 2797 limit_paren = self._match(TokenType.L_PAREN) 2798 expression = self._parse_number() 2799 2800 if limit_paren: 2801 self._match_r_paren() 2802 else: 2803 expression = self._parse_term() 2804 2805 if self._match(TokenType.COMMA): 2806 offset = expression 2807 expression = self._parse_term() 2808 else: 2809 offset = None 2810 2811 limit_exp = self.expression( 2812 exp.Limit, this=this, expression=expression, offset=offset, comments=comments 2813 ) 2814 2815 return limit_exp 2816 2817 if self._match(TokenType.FETCH): 2818 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2819 direction = self._prev.text if direction else "FIRST" 2820 2821 count = self._parse_number() 2822 percent = self._match(TokenType.PERCENT) 2823 2824 self._match_set((TokenType.ROW, TokenType.ROWS)) 2825 2826 only = self._match_text_seq("ONLY") 2827 with_ties = self._match_text_seq("WITH", "TIES") 2828 2829 if only and with_ties: 2830 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2831 2832 return self.expression( 2833 exp.Fetch, 2834 direction=direction, 2835 count=count, 2836 percent=percent, 2837 with_ties=with_ties, 2838 ) 2839 2840 return this 2841 2842 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2843 if not self._match(TokenType.OFFSET): 2844 return this 2845 2846 count = self._parse_term() 2847 self._match_set((TokenType.ROW, TokenType.ROWS)) 2848 return self.expression(exp.Offset, this=this, expression=count) 2849 2850 def _parse_locks(self) -> t.List[exp.Lock]: 2851 locks = [] 2852 while True: 2853 if self._match_text_seq("FOR", "UPDATE"): 2854 update = True 2855 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 2856 "LOCK", "IN", "SHARE", "MODE" 2857 ): 2858 update = False 2859 else: 2860 break 2861 2862 expressions = None 2863 if self._match_text_seq("OF"): 2864 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 2865 2866 wait: t.Optional[bool | exp.Expression] = None 2867 if self._match_text_seq("NOWAIT"): 2868 wait = True 2869 elif self._match_text_seq("WAIT"): 2870 wait = self._parse_primary() 2871 elif self._match_text_seq("SKIP", "LOCKED"): 2872 wait = False 2873 2874 locks.append( 2875 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 2876 ) 2877 2878 return locks 2879 2880 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2881 if not self._match_set(self.SET_OPERATIONS): 2882 return this 2883 2884 token_type = self._prev.token_type 2885 2886 if token_type == TokenType.UNION: 2887 expression = exp.Union 2888 elif token_type == TokenType.EXCEPT: 2889 expression = exp.Except 2890 else: 2891 expression = exp.Intersect 2892 2893 return self.expression( 2894 expression, 2895 this=this, 2896 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2897 expression=self._parse_set_operations(self._parse_select(nested=True)), 2898 ) 2899 2900 def _parse_expression(self) -> t.Optional[exp.Expression]: 2901 return self._parse_alias(self._parse_conjunction()) 2902 2903 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2904 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2905 2906 def _parse_equality(self) -> t.Optional[exp.Expression]: 2907 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2908 2909 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2910 return self._parse_tokens(self._parse_range, self.COMPARISON) 2911 2912 def _parse_range(self) -> t.Optional[exp.Expression]: 2913 this = self._parse_bitwise() 2914 negate = self._match(TokenType.NOT) 2915 2916 if self._match_set(self.RANGE_PARSERS): 2917 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2918 if not expression: 2919 return this 2920 2921 this = expression 2922 elif self._match(TokenType.ISNULL): 2923 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2924 2925 # Postgres supports ISNULL and NOTNULL for conditions. 2926 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2927 if self._match(TokenType.NOTNULL): 2928 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2929 this = self.expression(exp.Not, this=this) 2930 2931 if negate: 2932 this = self.expression(exp.Not, this=this) 2933 2934 if self._match(TokenType.IS): 2935 this = self._parse_is(this) 2936 2937 return this 2938 2939 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2940 index = self._index - 1 2941 negate = self._match(TokenType.NOT) 2942 2943 if self._match_text_seq("DISTINCT", "FROM"): 2944 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2945 return self.expression(klass, this=this, expression=self._parse_expression()) 2946 2947 expression = self._parse_null() or self._parse_boolean() 2948 if not expression: 2949 self._retreat(index) 2950 return None 2951 2952 this = self.expression(exp.Is, this=this, expression=expression) 2953 return self.expression(exp.Not, this=this) if negate else this 2954 2955 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 2956 unnest = self._parse_unnest(with_alias=False) 2957 if unnest: 2958 this = self.expression(exp.In, this=this, unnest=unnest) 2959 elif self._match(TokenType.L_PAREN): 2960 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 2961 2962 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2963 this = self.expression(exp.In, this=this, query=expressions[0]) 2964 else: 2965 this = self.expression(exp.In, this=this, expressions=expressions) 2966 2967 self._match_r_paren(this) 2968 else: 2969 this = self.expression(exp.In, this=this, field=self._parse_field()) 2970 2971 return this 2972 2973 def _parse_between(self, this: exp.Expression) -> exp.Between: 2974 low = self._parse_bitwise() 2975 self._match(TokenType.AND) 2976 high = self._parse_bitwise() 2977 return self.expression(exp.Between, this=this, low=low, high=high) 2978 2979 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2980 if not self._match(TokenType.ESCAPE): 2981 return this 2982 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2983 2984 def _parse_interval(self) -> t.Optional[exp.Interval]: 2985 if not self._match(TokenType.INTERVAL): 2986 return None 2987 2988 if self._match(TokenType.STRING, advance=False): 2989 this = self._parse_primary() 2990 else: 2991 this = self._parse_term() 2992 2993 unit = self._parse_function() or self._parse_var() 2994 2995 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2996 # each INTERVAL expression into this canonical form so it's easy to transpile 2997 if this and this.is_number: 2998 this = exp.Literal.string(this.name) 2999 elif this and this.is_string: 3000 parts = this.name.split() 3001 3002 if len(parts) == 2: 3003 if unit: 3004 # this is not actually a unit, it's something else 3005 unit = None 3006 self._retreat(self._index - 1) 3007 else: 3008 this = exp.Literal.string(parts[0]) 3009 unit = self.expression(exp.Var, this=parts[1]) 3010 3011 return self.expression(exp.Interval, this=this, unit=unit) 3012 3013 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 3014 this = self._parse_term() 3015 3016 while True: 3017 if self._match_set(self.BITWISE): 3018 this = self.expression( 3019 self.BITWISE[self._prev.token_type], this=this, expression=self._parse_term() 3020 ) 3021 elif self._match_pair(TokenType.LT, TokenType.LT): 3022 this = self.expression( 3023 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 3024 ) 3025 elif self._match_pair(TokenType.GT, TokenType.GT): 3026 this = self.expression( 3027 exp.BitwiseRightShift, this=this, expression=self._parse_term() 3028 ) 3029 else: 3030 break 3031 3032 return this 3033 3034 def _parse_term(self) -> t.Optional[exp.Expression]: 3035 return self._parse_tokens(self._parse_factor, self.TERM) 3036 3037 def _parse_factor(self) -> t.Optional[exp.Expression]: 3038 return self._parse_tokens(self._parse_unary, self.FACTOR) 3039 3040 def _parse_unary(self) -> t.Optional[exp.Expression]: 3041 if self._match_set(self.UNARY_PARSERS): 3042 return self.UNARY_PARSERS[self._prev.token_type](self) 3043 return self._parse_at_time_zone(self._parse_type()) 3044 3045 def _parse_type(self) -> t.Optional[exp.Expression]: 3046 interval = self._parse_interval() 3047 if interval: 3048 return interval 3049 3050 index = self._index 3051 data_type = self._parse_types(check_func=True) 3052 this = self._parse_column() 3053 3054 if data_type: 3055 if isinstance(this, exp.Literal): 3056 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 3057 if parser: 3058 return parser(self, this, data_type) 3059 return self.expression(exp.Cast, this=this, to=data_type) 3060 if not data_type.expressions: 3061 self._retreat(index) 3062 return self._parse_column() 3063 return self._parse_column_ops(data_type) 3064 3065 return this 3066 3067 def _parse_type_size(self) -> t.Optional[exp.DataTypeSize]: 3068 this = self._parse_type() 3069 if not this: 3070 return None 3071 3072 return self.expression( 3073 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 3074 ) 3075 3076 def _parse_types( 3077 self, check_func: bool = False, schema: bool = False 3078 ) -> t.Optional[exp.Expression]: 3079 index = self._index 3080 3081 prefix = self._match_text_seq("SYSUDTLIB", ".") 3082 3083 if not self._match_set(self.TYPE_TOKENS): 3084 return None 3085 3086 type_token = self._prev.token_type 3087 3088 if type_token == TokenType.PSEUDO_TYPE: 3089 return self.expression(exp.PseudoType, this=self._prev.text) 3090 3091 nested = type_token in self.NESTED_TYPE_TOKENS 3092 is_struct = type_token == TokenType.STRUCT 3093 expressions = None 3094 maybe_func = False 3095 3096 if self._match(TokenType.L_PAREN): 3097 if is_struct: 3098 expressions = self._parse_csv(self._parse_struct_types) 3099 elif nested: 3100 expressions = self._parse_csv( 3101 lambda: self._parse_types(check_func=check_func, schema=schema) 3102 ) 3103 elif type_token in self.ENUM_TYPE_TOKENS: 3104 expressions = self._parse_csv(self._parse_primary) 3105 else: 3106 expressions = self._parse_csv(self._parse_type_size) 3107 3108 if not expressions or not self._match(TokenType.R_PAREN): 3109 self._retreat(index) 3110 return None 3111 3112 maybe_func = True 3113 3114 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3115 this = exp.DataType( 3116 this=exp.DataType.Type.ARRAY, 3117 expressions=[ 3118 exp.DataType( 3119 this=exp.DataType.Type[type_token.value], 3120 expressions=expressions, 3121 nested=nested, 3122 ) 3123 ], 3124 nested=True, 3125 ) 3126 3127 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 3128 this = exp.DataType(this=exp.DataType.Type.ARRAY, expressions=[this], nested=True) 3129 3130 return this 3131 3132 if self._match(TokenType.L_BRACKET): 3133 self._retreat(index) 3134 return None 3135 3136 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 3137 if nested and self._match(TokenType.LT): 3138 if is_struct: 3139 expressions = self._parse_csv(self._parse_struct_types) 3140 else: 3141 expressions = self._parse_csv( 3142 lambda: self._parse_types(check_func=check_func, schema=schema) 3143 ) 3144 3145 if not self._match(TokenType.GT): 3146 self.raise_error("Expecting >") 3147 3148 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 3149 values = self._parse_csv(self._parse_conjunction) 3150 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 3151 3152 value: t.Optional[exp.Expression] = None 3153 if type_token in self.TIMESTAMPS: 3154 if self._match_text_seq("WITH", "TIME", "ZONE"): 3155 maybe_func = False 3156 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 3157 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 3158 maybe_func = False 3159 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 3160 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 3161 maybe_func = False 3162 elif type_token == TokenType.INTERVAL: 3163 unit = self._parse_var() 3164 3165 if not unit: 3166 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 3167 else: 3168 value = self.expression(exp.Interval, unit=unit) 3169 3170 if maybe_func and check_func: 3171 index2 = self._index 3172 peek = self._parse_string() 3173 3174 if not peek: 3175 self._retreat(index) 3176 return None 3177 3178 self._retreat(index2) 3179 3180 if value: 3181 return value 3182 3183 return exp.DataType( 3184 this=exp.DataType.Type[type_token.value], 3185 expressions=expressions, 3186 nested=nested, 3187 values=values, 3188 prefix=prefix, 3189 ) 3190 3191 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 3192 this = self._parse_type() or self._parse_id_var() 3193 self._match(TokenType.COLON) 3194 return self._parse_column_def(this) 3195 3196 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3197 if not self._match_text_seq("AT", "TIME", "ZONE"): 3198 return this 3199 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 3200 3201 def _parse_column(self) -> t.Optional[exp.Expression]: 3202 this = self._parse_field() 3203 if isinstance(this, exp.Identifier): 3204 this = self.expression(exp.Column, this=this) 3205 elif not this: 3206 return self._parse_bracket(this) 3207 return self._parse_column_ops(this) 3208 3209 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3210 this = self._parse_bracket(this) 3211 3212 while self._match_set(self.COLUMN_OPERATORS): 3213 op_token = self._prev.token_type 3214 op = self.COLUMN_OPERATORS.get(op_token) 3215 3216 if op_token == TokenType.DCOLON: 3217 field = self._parse_types() 3218 if not field: 3219 self.raise_error("Expected type") 3220 elif op and self._curr: 3221 self._advance() 3222 value = self._prev.text 3223 field = ( 3224 exp.Literal.number(value) 3225 if self._prev.token_type == TokenType.NUMBER 3226 else exp.Literal.string(value) 3227 ) 3228 else: 3229 field = self._parse_field(anonymous_func=True, any_token=True) 3230 3231 if isinstance(field, exp.Func): 3232 # bigquery allows function calls like x.y.count(...) 3233 # SAFE.SUBSTR(...) 3234 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3235 this = self._replace_columns_with_dots(this) 3236 3237 if op: 3238 this = op(self, this, field) 3239 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3240 this = self.expression( 3241 exp.Column, 3242 this=field, 3243 table=this.this, 3244 db=this.args.get("table"), 3245 catalog=this.args.get("db"), 3246 ) 3247 else: 3248 this = self.expression(exp.Dot, this=this, expression=field) 3249 this = self._parse_bracket(this) 3250 return this 3251 3252 def _parse_primary(self) -> t.Optional[exp.Expression]: 3253 if self._match_set(self.PRIMARY_PARSERS): 3254 token_type = self._prev.token_type 3255 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3256 3257 if token_type == TokenType.STRING: 3258 expressions = [primary] 3259 while self._match(TokenType.STRING): 3260 expressions.append(exp.Literal.string(self._prev.text)) 3261 3262 if len(expressions) > 1: 3263 return self.expression(exp.Concat, expressions=expressions) 3264 3265 return primary 3266 3267 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3268 return exp.Literal.number(f"0.{self._prev.text}") 3269 3270 if self._match(TokenType.L_PAREN): 3271 comments = self._prev_comments 3272 query = self._parse_select() 3273 3274 if query: 3275 expressions = [query] 3276 else: 3277 expressions = self._parse_expressions() 3278 3279 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3280 3281 if isinstance(this, exp.Subqueryable): 3282 this = self._parse_set_operations( 3283 self._parse_subquery(this=this, parse_alias=False) 3284 ) 3285 elif len(expressions) > 1: 3286 this = self.expression(exp.Tuple, expressions=expressions) 3287 else: 3288 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3289 3290 if this: 3291 this.add_comments(comments) 3292 3293 self._match_r_paren(expression=this) 3294 return this 3295 3296 return None 3297 3298 def _parse_field( 3299 self, 3300 any_token: bool = False, 3301 tokens: t.Optional[t.Collection[TokenType]] = None, 3302 anonymous_func: bool = False, 3303 ) -> t.Optional[exp.Expression]: 3304 return ( 3305 self._parse_primary() 3306 or self._parse_function(anonymous=anonymous_func) 3307 or self._parse_id_var(any_token=any_token, tokens=tokens) 3308 ) 3309 3310 def _parse_function( 3311 self, 3312 functions: t.Optional[t.Dict[str, t.Callable]] = None, 3313 anonymous: bool = False, 3314 optional_parens: bool = True, 3315 ) -> t.Optional[exp.Expression]: 3316 if not self._curr: 3317 return None 3318 3319 token_type = self._curr.token_type 3320 3321 if optional_parens and self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3322 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3323 3324 if not self._next or self._next.token_type != TokenType.L_PAREN: 3325 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 3326 self._advance() 3327 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3328 3329 return None 3330 3331 if token_type not in self.FUNC_TOKENS: 3332 return None 3333 3334 this = self._curr.text 3335 upper = this.upper() 3336 self._advance(2) 3337 3338 parser = self.FUNCTION_PARSERS.get(upper) 3339 3340 if parser and not anonymous: 3341 this = parser(self) 3342 else: 3343 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3344 3345 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3346 this = self.expression(subquery_predicate, this=self._parse_select()) 3347 self._match_r_paren() 3348 return this 3349 3350 if functions is None: 3351 functions = self.FUNCTIONS 3352 3353 function = functions.get(upper) 3354 3355 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 3356 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 3357 3358 if function and not anonymous: 3359 this = self.validate_expression(function(args), args) 3360 else: 3361 this = self.expression(exp.Anonymous, this=this, expressions=args) 3362 3363 self._match(TokenType.R_PAREN, expression=this) 3364 return self._parse_window(this) 3365 3366 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3367 return self._parse_column_def(self._parse_id_var()) 3368 3369 def _parse_user_defined_function( 3370 self, kind: t.Optional[TokenType] = None 3371 ) -> t.Optional[exp.Expression]: 3372 this = self._parse_id_var() 3373 3374 while self._match(TokenType.DOT): 3375 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3376 3377 if not self._match(TokenType.L_PAREN): 3378 return this 3379 3380 expressions = self._parse_csv(self._parse_function_parameter) 3381 self._match_r_paren() 3382 return self.expression( 3383 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3384 ) 3385 3386 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 3387 literal = self._parse_primary() 3388 if literal: 3389 return self.expression(exp.Introducer, this=token.text, expression=literal) 3390 3391 return self.expression(exp.Identifier, this=token.text) 3392 3393 def _parse_session_parameter(self) -> exp.SessionParameter: 3394 kind = None 3395 this = self._parse_id_var() or self._parse_primary() 3396 3397 if this and self._match(TokenType.DOT): 3398 kind = this.name 3399 this = self._parse_var() or self._parse_primary() 3400 3401 return self.expression(exp.SessionParameter, this=this, kind=kind) 3402 3403 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 3404 index = self._index 3405 3406 if self._match(TokenType.L_PAREN): 3407 expressions = self._parse_csv(self._parse_id_var) 3408 3409 if not self._match(TokenType.R_PAREN): 3410 self._retreat(index) 3411 else: 3412 expressions = [self._parse_id_var()] 3413 3414 if self._match_set(self.LAMBDAS): 3415 return self.LAMBDAS[self._prev.token_type](self, expressions) 3416 3417 self._retreat(index) 3418 3419 this: t.Optional[exp.Expression] 3420 3421 if self._match(TokenType.DISTINCT): 3422 this = self.expression( 3423 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3424 ) 3425 else: 3426 this = self._parse_select_or_expression(alias=alias) 3427 3428 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3429 3430 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3431 index = self._index 3432 3433 if not self.errors: 3434 try: 3435 if self._parse_select(nested=True): 3436 return this 3437 except ParseError: 3438 pass 3439 finally: 3440 self.errors.clear() 3441 self._retreat(index) 3442 3443 if not self._match(TokenType.L_PAREN): 3444 return this 3445 3446 args = self._parse_csv( 3447 lambda: self._parse_constraint() 3448 or self._parse_column_def(self._parse_field(any_token=True)) 3449 ) 3450 3451 self._match_r_paren() 3452 return self.expression(exp.Schema, this=this, expressions=args) 3453 3454 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3455 # column defs are not really columns, they're identifiers 3456 if isinstance(this, exp.Column): 3457 this = this.this 3458 3459 kind = self._parse_types(schema=True) 3460 3461 if self._match_text_seq("FOR", "ORDINALITY"): 3462 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3463 3464 constraints = [] 3465 while True: 3466 constraint = self._parse_column_constraint() 3467 if not constraint: 3468 break 3469 constraints.append(constraint) 3470 3471 if not kind and not constraints: 3472 return this 3473 3474 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3475 3476 def _parse_auto_increment( 3477 self, 3478 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 3479 start = None 3480 increment = None 3481 3482 if self._match(TokenType.L_PAREN, advance=False): 3483 args = self._parse_wrapped_csv(self._parse_bitwise) 3484 start = seq_get(args, 0) 3485 increment = seq_get(args, 1) 3486 elif self._match_text_seq("START"): 3487 start = self._parse_bitwise() 3488 self._match_text_seq("INCREMENT") 3489 increment = self._parse_bitwise() 3490 3491 if start and increment: 3492 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3493 3494 return exp.AutoIncrementColumnConstraint() 3495 3496 def _parse_compress(self) -> exp.CompressColumnConstraint: 3497 if self._match(TokenType.L_PAREN, advance=False): 3498 return self.expression( 3499 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3500 ) 3501 3502 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3503 3504 def _parse_generated_as_identity(self) -> exp.GeneratedAsIdentityColumnConstraint: 3505 if self._match_text_seq("BY", "DEFAULT"): 3506 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3507 this = self.expression( 3508 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3509 ) 3510 else: 3511 self._match_text_seq("ALWAYS") 3512 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3513 3514 self._match(TokenType.ALIAS) 3515 identity = self._match_text_seq("IDENTITY") 3516 3517 if self._match(TokenType.L_PAREN): 3518 if self._match_text_seq("START", "WITH"): 3519 this.set("start", self._parse_bitwise()) 3520 if self._match_text_seq("INCREMENT", "BY"): 3521 this.set("increment", self._parse_bitwise()) 3522 if self._match_text_seq("MINVALUE"): 3523 this.set("minvalue", self._parse_bitwise()) 3524 if self._match_text_seq("MAXVALUE"): 3525 this.set("maxvalue", self._parse_bitwise()) 3526 3527 if self._match_text_seq("CYCLE"): 3528 this.set("cycle", True) 3529 elif self._match_text_seq("NO", "CYCLE"): 3530 this.set("cycle", False) 3531 3532 if not identity: 3533 this.set("expression", self._parse_bitwise()) 3534 3535 self._match_r_paren() 3536 3537 return this 3538 3539 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 3540 self._match_text_seq("LENGTH") 3541 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3542 3543 def _parse_not_constraint( 3544 self, 3545 ) -> t.Optional[exp.NotNullColumnConstraint | exp.CaseSpecificColumnConstraint]: 3546 if self._match_text_seq("NULL"): 3547 return self.expression(exp.NotNullColumnConstraint) 3548 if self._match_text_seq("CASESPECIFIC"): 3549 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3550 return None 3551 3552 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3553 if self._match(TokenType.CONSTRAINT): 3554 this = self._parse_id_var() 3555 else: 3556 this = None 3557 3558 if self._match_texts(self.CONSTRAINT_PARSERS): 3559 return self.expression( 3560 exp.ColumnConstraint, 3561 this=this, 3562 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3563 ) 3564 3565 return this 3566 3567 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3568 if not self._match(TokenType.CONSTRAINT): 3569 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3570 3571 this = self._parse_id_var() 3572 expressions = [] 3573 3574 while True: 3575 constraint = self._parse_unnamed_constraint() or self._parse_function() 3576 if not constraint: 3577 break 3578 expressions.append(constraint) 3579 3580 return self.expression(exp.Constraint, this=this, expressions=expressions) 3581 3582 def _parse_unnamed_constraint( 3583 self, constraints: t.Optional[t.Collection[str]] = None 3584 ) -> t.Optional[exp.Expression]: 3585 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3586 return None 3587 3588 constraint = self._prev.text.upper() 3589 if constraint not in self.CONSTRAINT_PARSERS: 3590 self.raise_error(f"No parser found for schema constraint {constraint}.") 3591 3592 return self.CONSTRAINT_PARSERS[constraint](self) 3593 3594 def _parse_unique(self) -> exp.UniqueColumnConstraint: 3595 self._match_text_seq("KEY") 3596 return self.expression( 3597 exp.UniqueColumnConstraint, this=self._parse_schema(self._parse_id_var(any_token=False)) 3598 ) 3599 3600 def _parse_key_constraint_options(self) -> t.List[str]: 3601 options = [] 3602 while True: 3603 if not self._curr: 3604 break 3605 3606 if self._match(TokenType.ON): 3607 action = None 3608 on = self._advance_any() and self._prev.text 3609 3610 if self._match_text_seq("NO", "ACTION"): 3611 action = "NO ACTION" 3612 elif self._match_text_seq("CASCADE"): 3613 action = "CASCADE" 3614 elif self._match_pair(TokenType.SET, TokenType.NULL): 3615 action = "SET NULL" 3616 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3617 action = "SET DEFAULT" 3618 else: 3619 self.raise_error("Invalid key constraint") 3620 3621 options.append(f"ON {on} {action}") 3622 elif self._match_text_seq("NOT", "ENFORCED"): 3623 options.append("NOT ENFORCED") 3624 elif self._match_text_seq("DEFERRABLE"): 3625 options.append("DEFERRABLE") 3626 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3627 options.append("INITIALLY DEFERRED") 3628 elif self._match_text_seq("NORELY"): 3629 options.append("NORELY") 3630 elif self._match_text_seq("MATCH", "FULL"): 3631 options.append("MATCH FULL") 3632 else: 3633 break 3634 3635 return options 3636 3637 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 3638 if match and not self._match(TokenType.REFERENCES): 3639 return None 3640 3641 expressions = None 3642 this = self._parse_table(schema=True) 3643 options = self._parse_key_constraint_options() 3644 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3645 3646 def _parse_foreign_key(self) -> exp.ForeignKey: 3647 expressions = self._parse_wrapped_id_vars() 3648 reference = self._parse_references() 3649 options = {} 3650 3651 while self._match(TokenType.ON): 3652 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3653 self.raise_error("Expected DELETE or UPDATE") 3654 3655 kind = self._prev.text.lower() 3656 3657 if self._match_text_seq("NO", "ACTION"): 3658 action = "NO ACTION" 3659 elif self._match(TokenType.SET): 3660 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3661 action = "SET " + self._prev.text.upper() 3662 else: 3663 self._advance() 3664 action = self._prev.text.upper() 3665 3666 options[kind] = action 3667 3668 return self.expression( 3669 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3670 ) 3671 3672 def _parse_primary_key( 3673 self, wrapped_optional: bool = False, in_props: bool = False 3674 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 3675 desc = ( 3676 self._match_set((TokenType.ASC, TokenType.DESC)) 3677 and self._prev.token_type == TokenType.DESC 3678 ) 3679 3680 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 3681 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3682 3683 expressions = self._parse_wrapped_csv(self._parse_field, optional=wrapped_optional) 3684 options = self._parse_key_constraint_options() 3685 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3686 3687 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3688 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3689 return this 3690 3691 bracket_kind = self._prev.token_type 3692 3693 if self._match(TokenType.COLON): 3694 expressions: t.List[t.Optional[exp.Expression]] = [ 3695 self.expression(exp.Slice, expression=self._parse_conjunction()) 3696 ] 3697 else: 3698 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3699 3700 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3701 if bracket_kind == TokenType.L_BRACE: 3702 this = self.expression(exp.Struct, expressions=expressions) 3703 elif not this or this.name.upper() == "ARRAY": 3704 this = self.expression(exp.Array, expressions=expressions) 3705 else: 3706 expressions = apply_index_offset(this, expressions, -self.INDEX_OFFSET) 3707 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3708 3709 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3710 self.raise_error("Expected ]") 3711 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3712 self.raise_error("Expected }") 3713 3714 self._add_comments(this) 3715 return self._parse_bracket(this) 3716 3717 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3718 if self._match(TokenType.COLON): 3719 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3720 return this 3721 3722 def _parse_case(self) -> t.Optional[exp.Expression]: 3723 ifs = [] 3724 default = None 3725 3726 expression = self._parse_conjunction() 3727 3728 while self._match(TokenType.WHEN): 3729 this = self._parse_conjunction() 3730 self._match(TokenType.THEN) 3731 then = self._parse_conjunction() 3732 ifs.append(self.expression(exp.If, this=this, true=then)) 3733 3734 if self._match(TokenType.ELSE): 3735 default = self._parse_conjunction() 3736 3737 if not self._match(TokenType.END): 3738 self.raise_error("Expected END after CASE", self._prev) 3739 3740 return self._parse_window( 3741 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3742 ) 3743 3744 def _parse_if(self) -> t.Optional[exp.Expression]: 3745 if self._match(TokenType.L_PAREN): 3746 args = self._parse_csv(self._parse_conjunction) 3747 this = self.validate_expression(exp.If.from_arg_list(args), args) 3748 self._match_r_paren() 3749 else: 3750 index = self._index - 1 3751 condition = self._parse_conjunction() 3752 3753 if not condition: 3754 self._retreat(index) 3755 return None 3756 3757 self._match(TokenType.THEN) 3758 true = self._parse_conjunction() 3759 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3760 self._match(TokenType.END) 3761 this = self.expression(exp.If, this=condition, true=true, false=false) 3762 3763 return self._parse_window(this) 3764 3765 def _parse_extract(self) -> exp.Extract: 3766 this = self._parse_function() or self._parse_var() or self._parse_type() 3767 3768 if self._match(TokenType.FROM): 3769 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3770 3771 if not self._match(TokenType.COMMA): 3772 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3773 3774 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3775 3776 def _parse_any_value(self) -> exp.AnyValue: 3777 this = self._parse_lambda() 3778 is_max = None 3779 having = None 3780 3781 if self._match(TokenType.HAVING): 3782 self._match_texts(("MAX", "MIN")) 3783 is_max = self._prev.text == "MAX" 3784 having = self._parse_column() 3785 3786 return self.expression(exp.AnyValue, this=this, having=having, max=is_max) 3787 3788 def _parse_cast(self, strict: bool) -> exp.Expression: 3789 this = self._parse_conjunction() 3790 3791 if not self._match(TokenType.ALIAS): 3792 if self._match(TokenType.COMMA): 3793 return self.expression( 3794 exp.CastToStrType, this=this, expression=self._parse_string() 3795 ) 3796 else: 3797 self.raise_error("Expected AS after CAST") 3798 3799 fmt = None 3800 to = self._parse_types() 3801 3802 if not to: 3803 self.raise_error("Expected TYPE after CAST") 3804 elif to.this == exp.DataType.Type.CHAR: 3805 if self._match(TokenType.CHARACTER_SET): 3806 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3807 elif self._match(TokenType.FORMAT): 3808 fmt_string = self._parse_string() 3809 fmt = self._parse_at_time_zone(fmt_string) 3810 3811 if to.this in exp.DataType.TEMPORAL_TYPES: 3812 this = self.expression( 3813 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 3814 this=this, 3815 format=exp.Literal.string( 3816 format_time( 3817 fmt_string.this if fmt_string else "", 3818 self.FORMAT_MAPPING or self.TIME_MAPPING, 3819 self.FORMAT_TRIE or self.TIME_TRIE, 3820 ) 3821 ), 3822 ) 3823 3824 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 3825 this.set("zone", fmt.args["zone"]) 3826 3827 return this 3828 3829 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, format=fmt) 3830 3831 def _parse_concat(self) -> t.Optional[exp.Expression]: 3832 args = self._parse_csv(self._parse_conjunction) 3833 if self.CONCAT_NULL_OUTPUTS_STRING: 3834 args = [ 3835 exp.func("COALESCE", exp.cast(arg, "text"), exp.Literal.string("")) 3836 for arg in args 3837 if arg 3838 ] 3839 3840 # Some dialects (e.g. Trino) don't allow a single-argument CONCAT call, so when 3841 # we find such a call we replace it with its argument. 3842 if len(args) == 1: 3843 return args[0] 3844 3845 return self.expression( 3846 exp.Concat if self.STRICT_STRING_CONCAT else exp.SafeConcat, expressions=args 3847 ) 3848 3849 def _parse_string_agg(self) -> exp.Expression: 3850 if self._match(TokenType.DISTINCT): 3851 args: t.List[t.Optional[exp.Expression]] = [ 3852 self.expression(exp.Distinct, expressions=[self._parse_conjunction()]) 3853 ] 3854 if self._match(TokenType.COMMA): 3855 args.extend(self._parse_csv(self._parse_conjunction)) 3856 else: 3857 args = self._parse_csv(self._parse_conjunction) 3858 3859 index = self._index 3860 if not self._match(TokenType.R_PAREN): 3861 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3862 return self.expression( 3863 exp.GroupConcat, 3864 this=seq_get(args, 0), 3865 separator=self._parse_order(this=seq_get(args, 1)), 3866 ) 3867 3868 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3869 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3870 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3871 if not self._match_text_seq("WITHIN", "GROUP"): 3872 self._retreat(index) 3873 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 3874 3875 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3876 order = self._parse_order(this=seq_get(args, 0)) 3877 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3878 3879 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3880 this = self._parse_bitwise() 3881 3882 if self._match(TokenType.USING): 3883 to: t.Optional[exp.Expression] = self.expression( 3884 exp.CharacterSet, this=self._parse_var() 3885 ) 3886 elif self._match(TokenType.COMMA): 3887 to = self._parse_types() 3888 else: 3889 to = None 3890 3891 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3892 3893 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 3894 """ 3895 There are generally two variants of the DECODE function: 3896 3897 - DECODE(bin, charset) 3898 - DECODE(expression, search, result [, search, result] ... [, default]) 3899 3900 The second variant will always be parsed into a CASE expression. Note that NULL 3901 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3902 instead of relying on pattern matching. 3903 """ 3904 args = self._parse_csv(self._parse_conjunction) 3905 3906 if len(args) < 3: 3907 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3908 3909 expression, *expressions = args 3910 if not expression: 3911 return None 3912 3913 ifs = [] 3914 for search, result in zip(expressions[::2], expressions[1::2]): 3915 if not search or not result: 3916 return None 3917 3918 if isinstance(search, exp.Literal): 3919 ifs.append( 3920 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3921 ) 3922 elif isinstance(search, exp.Null): 3923 ifs.append( 3924 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3925 ) 3926 else: 3927 cond = exp.or_( 3928 exp.EQ(this=expression.copy(), expression=search), 3929 exp.and_( 3930 exp.Is(this=expression.copy(), expression=exp.Null()), 3931 exp.Is(this=search.copy(), expression=exp.Null()), 3932 copy=False, 3933 ), 3934 copy=False, 3935 ) 3936 ifs.append(exp.If(this=cond, true=result)) 3937 3938 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3939 3940 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 3941 self._match_text_seq("KEY") 3942 key = self._parse_field() 3943 self._match(TokenType.COLON) 3944 self._match_text_seq("VALUE") 3945 value = self._parse_field() 3946 3947 if not key and not value: 3948 return None 3949 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3950 3951 def _parse_json_object(self) -> exp.JSONObject: 3952 star = self._parse_star() 3953 expressions = [star] if star else self._parse_csv(self._parse_json_key_value) 3954 3955 null_handling = None 3956 if self._match_text_seq("NULL", "ON", "NULL"): 3957 null_handling = "NULL ON NULL" 3958 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3959 null_handling = "ABSENT ON NULL" 3960 3961 unique_keys = None 3962 if self._match_text_seq("WITH", "UNIQUE"): 3963 unique_keys = True 3964 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3965 unique_keys = False 3966 3967 self._match_text_seq("KEYS") 3968 3969 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3970 format_json = self._match_text_seq("FORMAT", "JSON") 3971 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3972 3973 return self.expression( 3974 exp.JSONObject, 3975 expressions=expressions, 3976 null_handling=null_handling, 3977 unique_keys=unique_keys, 3978 return_type=return_type, 3979 format_json=format_json, 3980 encoding=encoding, 3981 ) 3982 3983 def _parse_logarithm(self) -> exp.Func: 3984 # Default argument order is base, expression 3985 args = self._parse_csv(self._parse_range) 3986 3987 if len(args) > 1: 3988 if not self.LOG_BASE_FIRST: 3989 args.reverse() 3990 return exp.Log.from_arg_list(args) 3991 3992 return self.expression( 3993 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3994 ) 3995 3996 def _parse_match_against(self) -> exp.MatchAgainst: 3997 expressions = self._parse_csv(self._parse_column) 3998 3999 self._match_text_seq(")", "AGAINST", "(") 4000 4001 this = self._parse_string() 4002 4003 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 4004 modifier = "IN NATURAL LANGUAGE MODE" 4005 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4006 modifier = f"{modifier} WITH QUERY EXPANSION" 4007 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 4008 modifier = "IN BOOLEAN MODE" 4009 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 4010 modifier = "WITH QUERY EXPANSION" 4011 else: 4012 modifier = None 4013 4014 return self.expression( 4015 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 4016 ) 4017 4018 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 4019 def _parse_open_json(self) -> exp.OpenJSON: 4020 this = self._parse_bitwise() 4021 path = self._match(TokenType.COMMA) and self._parse_string() 4022 4023 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 4024 this = self._parse_field(any_token=True) 4025 kind = self._parse_types() 4026 path = self._parse_string() 4027 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 4028 4029 return self.expression( 4030 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 4031 ) 4032 4033 expressions = None 4034 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 4035 self._match_l_paren() 4036 expressions = self._parse_csv(_parse_open_json_column_def) 4037 4038 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 4039 4040 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 4041 args = self._parse_csv(self._parse_bitwise) 4042 4043 if self._match(TokenType.IN): 4044 return self.expression( 4045 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 4046 ) 4047 4048 if haystack_first: 4049 haystack = seq_get(args, 0) 4050 needle = seq_get(args, 1) 4051 else: 4052 needle = seq_get(args, 0) 4053 haystack = seq_get(args, 1) 4054 4055 return self.expression( 4056 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 4057 ) 4058 4059 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 4060 args = self._parse_csv(self._parse_table) 4061 return exp.JoinHint(this=func_name.upper(), expressions=args) 4062 4063 def _parse_substring(self) -> exp.Substring: 4064 # Postgres supports the form: substring(string [from int] [for int]) 4065 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 4066 4067 args = self._parse_csv(self._parse_bitwise) 4068 4069 if self._match(TokenType.FROM): 4070 args.append(self._parse_bitwise()) 4071 if self._match(TokenType.FOR): 4072 args.append(self._parse_bitwise()) 4073 4074 return self.validate_expression(exp.Substring.from_arg_list(args), args) 4075 4076 def _parse_trim(self) -> exp.Trim: 4077 # https://www.w3resource.com/sql/character-functions/trim.php 4078 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 4079 4080 position = None 4081 collation = None 4082 4083 if self._match_texts(self.TRIM_TYPES): 4084 position = self._prev.text.upper() 4085 4086 expression = self._parse_bitwise() 4087 if self._match_set((TokenType.FROM, TokenType.COMMA)): 4088 this = self._parse_bitwise() 4089 else: 4090 this = expression 4091 expression = None 4092 4093 if self._match(TokenType.COLLATE): 4094 collation = self._parse_bitwise() 4095 4096 return self.expression( 4097 exp.Trim, this=this, position=position, expression=expression, collation=collation 4098 ) 4099 4100 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4101 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 4102 4103 def _parse_named_window(self) -> t.Optional[exp.Expression]: 4104 return self._parse_window(self._parse_id_var(), alias=True) 4105 4106 def _parse_respect_or_ignore_nulls( 4107 self, this: t.Optional[exp.Expression] 4108 ) -> t.Optional[exp.Expression]: 4109 if self._match_text_seq("IGNORE", "NULLS"): 4110 return self.expression(exp.IgnoreNulls, this=this) 4111 if self._match_text_seq("RESPECT", "NULLS"): 4112 return self.expression(exp.RespectNulls, this=this) 4113 return this 4114 4115 def _parse_window( 4116 self, this: t.Optional[exp.Expression], alias: bool = False 4117 ) -> t.Optional[exp.Expression]: 4118 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 4119 self._match(TokenType.WHERE) 4120 this = self.expression( 4121 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 4122 ) 4123 self._match_r_paren() 4124 4125 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 4126 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 4127 if self._match_text_seq("WITHIN", "GROUP"): 4128 order = self._parse_wrapped(self._parse_order) 4129 this = self.expression(exp.WithinGroup, this=this, expression=order) 4130 4131 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 4132 # Some dialects choose to implement and some do not. 4133 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 4134 4135 # There is some code above in _parse_lambda that handles 4136 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 4137 4138 # The below changes handle 4139 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 4140 4141 # Oracle allows both formats 4142 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 4143 # and Snowflake chose to do the same for familiarity 4144 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 4145 this = self._parse_respect_or_ignore_nulls(this) 4146 4147 # bigquery select from window x AS (partition by ...) 4148 if alias: 4149 over = None 4150 self._match(TokenType.ALIAS) 4151 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 4152 return this 4153 else: 4154 over = self._prev.text.upper() 4155 4156 if not self._match(TokenType.L_PAREN): 4157 return self.expression( 4158 exp.Window, this=this, alias=self._parse_id_var(False), over=over 4159 ) 4160 4161 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 4162 4163 first = self._match(TokenType.FIRST) 4164 if self._match_text_seq("LAST"): 4165 first = False 4166 4167 partition = self._parse_partition_by() 4168 order = self._parse_order() 4169 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 4170 4171 if kind: 4172 self._match(TokenType.BETWEEN) 4173 start = self._parse_window_spec() 4174 self._match(TokenType.AND) 4175 end = self._parse_window_spec() 4176 4177 spec = self.expression( 4178 exp.WindowSpec, 4179 kind=kind, 4180 start=start["value"], 4181 start_side=start["side"], 4182 end=end["value"], 4183 end_side=end["side"], 4184 ) 4185 else: 4186 spec = None 4187 4188 self._match_r_paren() 4189 4190 window = self.expression( 4191 exp.Window, 4192 this=this, 4193 partition_by=partition, 4194 order=order, 4195 spec=spec, 4196 alias=window_alias, 4197 over=over, 4198 first=first, 4199 ) 4200 4201 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 4202 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 4203 return self._parse_window(window, alias=alias) 4204 4205 return window 4206 4207 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 4208 self._match(TokenType.BETWEEN) 4209 4210 return { 4211 "value": ( 4212 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 4213 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 4214 or self._parse_bitwise() 4215 ), 4216 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 4217 } 4218 4219 def _parse_alias( 4220 self, this: t.Optional[exp.Expression], explicit: bool = False 4221 ) -> t.Optional[exp.Expression]: 4222 any_token = self._match(TokenType.ALIAS) 4223 4224 if explicit and not any_token: 4225 return this 4226 4227 if self._match(TokenType.L_PAREN): 4228 aliases = self.expression( 4229 exp.Aliases, 4230 this=this, 4231 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 4232 ) 4233 self._match_r_paren(aliases) 4234 return aliases 4235 4236 alias = self._parse_id_var(any_token) 4237 4238 if alias: 4239 return self.expression(exp.Alias, this=this, alias=alias) 4240 4241 return this 4242 4243 def _parse_id_var( 4244 self, 4245 any_token: bool = True, 4246 tokens: t.Optional[t.Collection[TokenType]] = None, 4247 ) -> t.Optional[exp.Expression]: 4248 identifier = self._parse_identifier() 4249 4250 if identifier: 4251 return identifier 4252 4253 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 4254 quoted = self._prev.token_type == TokenType.STRING 4255 return exp.Identifier(this=self._prev.text, quoted=quoted) 4256 4257 return None 4258 4259 def _parse_string(self) -> t.Optional[exp.Expression]: 4260 if self._match(TokenType.STRING): 4261 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 4262 return self._parse_placeholder() 4263 4264 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 4265 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 4266 4267 def _parse_number(self) -> t.Optional[exp.Expression]: 4268 if self._match(TokenType.NUMBER): 4269 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 4270 return self._parse_placeholder() 4271 4272 def _parse_identifier(self) -> t.Optional[exp.Expression]: 4273 if self._match(TokenType.IDENTIFIER): 4274 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 4275 return self._parse_placeholder() 4276 4277 def _parse_var( 4278 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 4279 ) -> t.Optional[exp.Expression]: 4280 if ( 4281 (any_token and self._advance_any()) 4282 or self._match(TokenType.VAR) 4283 or (self._match_set(tokens) if tokens else False) 4284 ): 4285 return self.expression(exp.Var, this=self._prev.text) 4286 return self._parse_placeholder() 4287 4288 def _advance_any(self) -> t.Optional[Token]: 4289 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4290 self._advance() 4291 return self._prev 4292 return None 4293 4294 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4295 return self._parse_var() or self._parse_string() 4296 4297 def _parse_null(self) -> t.Optional[exp.Expression]: 4298 if self._match(TokenType.NULL): 4299 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4300 return None 4301 4302 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4303 if self._match(TokenType.TRUE): 4304 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4305 if self._match(TokenType.FALSE): 4306 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4307 return None 4308 4309 def _parse_star(self) -> t.Optional[exp.Expression]: 4310 if self._match(TokenType.STAR): 4311 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4312 return None 4313 4314 def _parse_parameter(self) -> exp.Parameter: 4315 wrapped = self._match(TokenType.L_BRACE) 4316 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4317 self._match(TokenType.R_BRACE) 4318 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4319 4320 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4321 if self._match_set(self.PLACEHOLDER_PARSERS): 4322 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4323 if placeholder: 4324 return placeholder 4325 self._advance(-1) 4326 return None 4327 4328 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4329 if not self._match(TokenType.EXCEPT): 4330 return None 4331 if self._match(TokenType.L_PAREN, advance=False): 4332 return self._parse_wrapped_csv(self._parse_column) 4333 return self._parse_csv(self._parse_column) 4334 4335 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4336 if not self._match(TokenType.REPLACE): 4337 return None 4338 if self._match(TokenType.L_PAREN, advance=False): 4339 return self._parse_wrapped_csv(self._parse_expression) 4340 return self._parse_expressions() 4341 4342 def _parse_csv( 4343 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4344 ) -> t.List[t.Optional[exp.Expression]]: 4345 parse_result = parse_method() 4346 items = [parse_result] if parse_result is not None else [] 4347 4348 while self._match(sep): 4349 self._add_comments(parse_result) 4350 parse_result = parse_method() 4351 if parse_result is not None: 4352 items.append(parse_result) 4353 4354 return items 4355 4356 def _parse_tokens( 4357 self, parse_method: t.Callable, expressions: t.Dict 4358 ) -> t.Optional[exp.Expression]: 4359 this = parse_method() 4360 4361 while self._match_set(expressions): 4362 this = self.expression( 4363 expressions[self._prev.token_type], 4364 this=this, 4365 comments=self._prev_comments, 4366 expression=parse_method(), 4367 ) 4368 4369 return this 4370 4371 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4372 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4373 4374 def _parse_wrapped_csv( 4375 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4376 ) -> t.List[t.Optional[exp.Expression]]: 4377 return self._parse_wrapped( 4378 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4379 ) 4380 4381 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4382 wrapped = self._match(TokenType.L_PAREN) 4383 if not wrapped and not optional: 4384 self.raise_error("Expecting (") 4385 parse_result = parse_method() 4386 if wrapped: 4387 self._match_r_paren() 4388 return parse_result 4389 4390 def _parse_expressions(self) -> t.List[t.Optional[exp.Expression]]: 4391 return self._parse_csv(self._parse_expression) 4392 4393 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 4394 return self._parse_select() or self._parse_set_operations( 4395 self._parse_expression() if alias else self._parse_conjunction() 4396 ) 4397 4398 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4399 return self._parse_query_modifiers( 4400 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 4401 ) 4402 4403 def _parse_transaction(self) -> exp.Transaction | exp.Command: 4404 this = None 4405 if self._match_texts(self.TRANSACTION_KIND): 4406 this = self._prev.text 4407 4408 self._match_texts({"TRANSACTION", "WORK"}) 4409 4410 modes = [] 4411 while True: 4412 mode = [] 4413 while self._match(TokenType.VAR): 4414 mode.append(self._prev.text) 4415 4416 if mode: 4417 modes.append(" ".join(mode)) 4418 if not self._match(TokenType.COMMA): 4419 break 4420 4421 return self.expression(exp.Transaction, this=this, modes=modes) 4422 4423 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 4424 chain = None 4425 savepoint = None 4426 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4427 4428 self._match_texts({"TRANSACTION", "WORK"}) 4429 4430 if self._match_text_seq("TO"): 4431 self._match_text_seq("SAVEPOINT") 4432 savepoint = self._parse_id_var() 4433 4434 if self._match(TokenType.AND): 4435 chain = not self._match_text_seq("NO") 4436 self._match_text_seq("CHAIN") 4437 4438 if is_rollback: 4439 return self.expression(exp.Rollback, savepoint=savepoint) 4440 4441 return self.expression(exp.Commit, chain=chain) 4442 4443 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4444 if not self._match_text_seq("ADD"): 4445 return None 4446 4447 self._match(TokenType.COLUMN) 4448 exists_column = self._parse_exists(not_=True) 4449 expression = self._parse_column_def(self._parse_field(any_token=True)) 4450 4451 if expression: 4452 expression.set("exists", exists_column) 4453 4454 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4455 if self._match_texts(("FIRST", "AFTER")): 4456 position = self._prev.text 4457 column_position = self.expression( 4458 exp.ColumnPosition, this=self._parse_column(), position=position 4459 ) 4460 expression.set("position", column_position) 4461 4462 return expression 4463 4464 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 4465 drop = self._match(TokenType.DROP) and self._parse_drop() 4466 if drop and not isinstance(drop, exp.Command): 4467 drop.set("kind", drop.args.get("kind", "COLUMN")) 4468 return drop 4469 4470 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4471 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 4472 return self.expression( 4473 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4474 ) 4475 4476 def _parse_add_constraint(self) -> exp.AddConstraint: 4477 this = None 4478 kind = self._prev.token_type 4479 4480 if kind == TokenType.CONSTRAINT: 4481 this = self._parse_id_var() 4482 4483 if self._match_text_seq("CHECK"): 4484 expression = self._parse_wrapped(self._parse_conjunction) 4485 enforced = self._match_text_seq("ENFORCED") 4486 4487 return self.expression( 4488 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4489 ) 4490 4491 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4492 expression = self._parse_foreign_key() 4493 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4494 expression = self._parse_primary_key() 4495 else: 4496 expression = None 4497 4498 return self.expression(exp.AddConstraint, this=this, expression=expression) 4499 4500 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4501 index = self._index - 1 4502 4503 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4504 return self._parse_csv(self._parse_add_constraint) 4505 4506 self._retreat(index) 4507 return self._parse_csv(self._parse_add_column) 4508 4509 def _parse_alter_table_alter(self) -> exp.AlterColumn: 4510 self._match(TokenType.COLUMN) 4511 column = self._parse_field(any_token=True) 4512 4513 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4514 return self.expression(exp.AlterColumn, this=column, drop=True) 4515 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4516 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4517 4518 self._match_text_seq("SET", "DATA") 4519 return self.expression( 4520 exp.AlterColumn, 4521 this=column, 4522 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4523 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4524 using=self._match(TokenType.USING) and self._parse_conjunction(), 4525 ) 4526 4527 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4528 index = self._index - 1 4529 4530 partition_exists = self._parse_exists() 4531 if self._match(TokenType.PARTITION, advance=False): 4532 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4533 4534 self._retreat(index) 4535 return self._parse_csv(self._parse_drop_column) 4536 4537 def _parse_alter_table_rename(self) -> exp.RenameTable: 4538 self._match_text_seq("TO") 4539 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4540 4541 def _parse_alter(self) -> exp.AlterTable | exp.Command: 4542 start = self._prev 4543 4544 if not self._match(TokenType.TABLE): 4545 return self._parse_as_command(start) 4546 4547 exists = self._parse_exists() 4548 this = self._parse_table(schema=True) 4549 4550 if self._next: 4551 self._advance() 4552 4553 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4554 if parser: 4555 actions = ensure_list(parser(self)) 4556 4557 if not self._curr: 4558 return self.expression( 4559 exp.AlterTable, 4560 this=this, 4561 exists=exists, 4562 actions=actions, 4563 ) 4564 return self._parse_as_command(start) 4565 4566 def _parse_merge(self) -> exp.Merge: 4567 self._match(TokenType.INTO) 4568 target = self._parse_table() 4569 4570 self._match(TokenType.USING) 4571 using = self._parse_table() 4572 4573 self._match(TokenType.ON) 4574 on = self._parse_conjunction() 4575 4576 whens = [] 4577 while self._match(TokenType.WHEN): 4578 matched = not self._match(TokenType.NOT) 4579 self._match_text_seq("MATCHED") 4580 source = ( 4581 False 4582 if self._match_text_seq("BY", "TARGET") 4583 else self._match_text_seq("BY", "SOURCE") 4584 ) 4585 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4586 4587 self._match(TokenType.THEN) 4588 4589 if self._match(TokenType.INSERT): 4590 _this = self._parse_star() 4591 if _this: 4592 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4593 else: 4594 then = self.expression( 4595 exp.Insert, 4596 this=self._parse_value(), 4597 expression=self._match(TokenType.VALUES) and self._parse_value(), 4598 ) 4599 elif self._match(TokenType.UPDATE): 4600 expressions = self._parse_star() 4601 if expressions: 4602 then = self.expression(exp.Update, expressions=expressions) 4603 else: 4604 then = self.expression( 4605 exp.Update, 4606 expressions=self._match(TokenType.SET) 4607 and self._parse_csv(self._parse_equality), 4608 ) 4609 elif self._match(TokenType.DELETE): 4610 then = self.expression(exp.Var, this=self._prev.text) 4611 else: 4612 then = None 4613 4614 whens.append( 4615 self.expression( 4616 exp.When, 4617 matched=matched, 4618 source=source, 4619 condition=condition, 4620 then=then, 4621 ) 4622 ) 4623 4624 return self.expression( 4625 exp.Merge, 4626 this=target, 4627 using=using, 4628 on=on, 4629 expressions=whens, 4630 ) 4631 4632 def _parse_show(self) -> t.Optional[exp.Expression]: 4633 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 4634 if parser: 4635 return parser(self) 4636 self._advance() 4637 return self.expression(exp.Show, this=self._prev.text.upper()) 4638 4639 def _parse_set_item_assignment( 4640 self, kind: t.Optional[str] = None 4641 ) -> t.Optional[exp.Expression]: 4642 index = self._index 4643 4644 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4645 return self._parse_set_transaction(global_=kind == "GLOBAL") 4646 4647 left = self._parse_primary() or self._parse_id_var() 4648 4649 if not self._match_texts(("=", "TO")): 4650 self._retreat(index) 4651 return None 4652 4653 right = self._parse_statement() or self._parse_id_var() 4654 this = self.expression(exp.EQ, this=left, expression=right) 4655 4656 return self.expression(exp.SetItem, this=this, kind=kind) 4657 4658 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4659 self._match_text_seq("TRANSACTION") 4660 characteristics = self._parse_csv( 4661 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4662 ) 4663 return self.expression( 4664 exp.SetItem, 4665 expressions=characteristics, 4666 kind="TRANSACTION", 4667 **{"global": global_}, # type: ignore 4668 ) 4669 4670 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4671 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 4672 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4673 4674 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 4675 index = self._index 4676 set_ = self.expression( 4677 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 4678 ) 4679 4680 if self._curr: 4681 self._retreat(index) 4682 return self._parse_as_command(self._prev) 4683 4684 return set_ 4685 4686 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Var]: 4687 for option in options: 4688 if self._match_text_seq(*option.split(" ")): 4689 return exp.var(option) 4690 return None 4691 4692 def _parse_as_command(self, start: Token) -> exp.Command: 4693 while self._curr: 4694 self._advance() 4695 text = self._find_sql(start, self._prev) 4696 size = len(start.text) 4697 return exp.Command(this=text[:size], expression=text[size:]) 4698 4699 def _parse_dict_property(self, this: str) -> exp.DictProperty: 4700 settings = [] 4701 4702 self._match_l_paren() 4703 kind = self._parse_id_var() 4704 4705 if self._match(TokenType.L_PAREN): 4706 while True: 4707 key = self._parse_id_var() 4708 value = self._parse_primary() 4709 4710 if not key and value is None: 4711 break 4712 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 4713 self._match(TokenType.R_PAREN) 4714 4715 self._match_r_paren() 4716 4717 return self.expression( 4718 exp.DictProperty, 4719 this=this, 4720 kind=kind.this if kind else None, 4721 settings=settings, 4722 ) 4723 4724 def _parse_dict_range(self, this: str) -> exp.DictRange: 4725 self._match_l_paren() 4726 has_min = self._match_text_seq("MIN") 4727 if has_min: 4728 min = self._parse_var() or self._parse_primary() 4729 self._match_text_seq("MAX") 4730 max = self._parse_var() or self._parse_primary() 4731 else: 4732 max = self._parse_var() or self._parse_primary() 4733 min = exp.Literal.number(0) 4734 self._match_r_paren() 4735 return self.expression(exp.DictRange, this=this, min=min, max=max) 4736 4737 def _find_parser( 4738 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4739 ) -> t.Optional[t.Callable]: 4740 if not self._curr: 4741 return None 4742 4743 index = self._index 4744 this = [] 4745 while True: 4746 # The current token might be multiple words 4747 curr = self._curr.text.upper() 4748 key = curr.split(" ") 4749 this.append(curr) 4750 4751 self._advance() 4752 result, trie = in_trie(trie, key) 4753 if result == TrieResult.FAILED: 4754 break 4755 4756 if result == TrieResult.EXISTS: 4757 subparser = parsers[" ".join(this)] 4758 return subparser 4759 4760 self._retreat(index) 4761 return None 4762 4763 def _match(self, token_type, advance=True, expression=None): 4764 if not self._curr: 4765 return None 4766 4767 if self._curr.token_type == token_type: 4768 if advance: 4769 self._advance() 4770 self._add_comments(expression) 4771 return True 4772 4773 return None 4774 4775 def _match_set(self, types, advance=True): 4776 if not self._curr: 4777 return None 4778 4779 if self._curr.token_type in types: 4780 if advance: 4781 self._advance() 4782 return True 4783 4784 return None 4785 4786 def _match_pair(self, token_type_a, token_type_b, advance=True): 4787 if not self._curr or not self._next: 4788 return None 4789 4790 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4791 if advance: 4792 self._advance(2) 4793 return True 4794 4795 return None 4796 4797 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4798 if not self._match(TokenType.L_PAREN, expression=expression): 4799 self.raise_error("Expecting (") 4800 4801 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 4802 if not self._match(TokenType.R_PAREN, expression=expression): 4803 self.raise_error("Expecting )") 4804 4805 def _match_texts(self, texts, advance=True): 4806 if self._curr and self._curr.text.upper() in texts: 4807 if advance: 4808 self._advance() 4809 return True 4810 return False 4811 4812 def _match_text_seq(self, *texts, advance=True): 4813 index = self._index 4814 for text in texts: 4815 if self._curr and self._curr.text.upper() == text: 4816 self._advance() 4817 else: 4818 self._retreat(index) 4819 return False 4820 4821 if not advance: 4822 self._retreat(index) 4823 4824 return True 4825 4826 @t.overload 4827 def _replace_columns_with_dots(self, this: exp.Expression) -> exp.Expression: 4828 ... 4829 4830 @t.overload 4831 def _replace_columns_with_dots( 4832 self, this: t.Optional[exp.Expression] 4833 ) -> t.Optional[exp.Expression]: 4834 ... 4835 4836 def _replace_columns_with_dots(self, this): 4837 if isinstance(this, exp.Dot): 4838 exp.replace_children(this, self._replace_columns_with_dots) 4839 elif isinstance(this, exp.Column): 4840 exp.replace_children(this, self._replace_columns_with_dots) 4841 table = this.args.get("table") 4842 this = ( 4843 self.expression(exp.Dot, this=table, expression=this.this) if table else this.this 4844 ) 4845 4846 return this 4847 4848 def _replace_lambda( 4849 self, node: t.Optional[exp.Expression], lambda_variables: t.Set[str] 4850 ) -> t.Optional[exp.Expression]: 4851 if not node: 4852 return node 4853 4854 for column in node.find_all(exp.Column): 4855 if column.parts[0].name in lambda_variables: 4856 dot_or_id = column.to_dot() if column.table else column.this 4857 parent = column.parent 4858 4859 while isinstance(parent, exp.Dot): 4860 if not isinstance(parent.parent, exp.Dot): 4861 parent.replace(dot_or_id) 4862 break 4863 parent = parent.parent 4864 else: 4865 if column is node: 4866 node = dot_or_id 4867 else: 4868 column.replace(dot_or_id) 4869 return node
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
846 def __init__( 847 self, 848 error_level: t.Optional[ErrorLevel] = None, 849 error_message_context: int = 100, 850 max_errors: int = 3, 851 ): 852 self.error_level = error_level or ErrorLevel.IMMEDIATE 853 self.error_message_context = error_message_context 854 self.max_errors = max_errors 855 self.reset()
867 def parse( 868 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 869 ) -> t.List[t.Optional[exp.Expression]]: 870 """ 871 Parses a list of tokens and returns a list of syntax trees, one tree 872 per parsed SQL statement. 873 874 Args: 875 raw_tokens: The list of tokens. 876 sql: The original SQL string, used to produce helpful debug messages. 877 878 Returns: 879 The list of the produced syntax trees. 880 """ 881 return self._parse( 882 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 883 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
885 def parse_into( 886 self, 887 expression_types: exp.IntoType, 888 raw_tokens: t.List[Token], 889 sql: t.Optional[str] = None, 890 ) -> t.List[t.Optional[exp.Expression]]: 891 """ 892 Parses a list of tokens into a given Expression type. If a collection of Expression 893 types is given instead, this method will try to parse the token list into each one 894 of them, stopping at the first for which the parsing succeeds. 895 896 Args: 897 expression_types: The expression type(s) to try and parse the token list into. 898 raw_tokens: The list of tokens. 899 sql: The original SQL string, used to produce helpful debug messages. 900 901 Returns: 902 The target Expression. 903 """ 904 errors = [] 905 for expression_type in ensure_list(expression_types): 906 parser = self.EXPRESSION_PARSERS.get(expression_type) 907 if not parser: 908 raise TypeError(f"No parser registered for {expression_type}") 909 910 try: 911 return self._parse(parser, raw_tokens, sql) 912 except ParseError as e: 913 e.errors[0]["into_expression"] = expression_type 914 errors.append(e) 915 916 raise ParseError( 917 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 918 errors=merge_errors(errors), 919 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
956 def check_errors(self) -> None: 957 """Logs or raises any found errors, depending on the chosen error level setting.""" 958 if self.error_level == ErrorLevel.WARN: 959 for error in self.errors: 960 logger.error(str(error)) 961 elif self.error_level == ErrorLevel.RAISE and self.errors: 962 raise ParseError( 963 concat_messages(self.errors, self.max_errors), 964 errors=merge_errors(self.errors), 965 )
Logs or raises any found errors, depending on the chosen error level setting.
967 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 968 """ 969 Appends an error in the list of recorded errors or raises it, depending on the chosen 970 error level setting. 971 """ 972 token = token or self._curr or self._prev or Token.string("") 973 start = token.start 974 end = token.end + 1 975 start_context = self.sql[max(start - self.error_message_context, 0) : start] 976 highlight = self.sql[start:end] 977 end_context = self.sql[end : end + self.error_message_context] 978 979 error = ParseError.new( 980 f"{message}. Line {token.line}, Col: {token.col}.\n" 981 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 982 description=message, 983 line=token.line, 984 col=token.col, 985 start_context=start_context, 986 highlight=highlight, 987 end_context=end_context, 988 ) 989 990 if self.error_level == ErrorLevel.IMMEDIATE: 991 raise error 992 993 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
995 def expression( 996 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 997 ) -> E: 998 """ 999 Creates a new, validated Expression. 1000 1001 Args: 1002 exp_class: The expression class to instantiate. 1003 comments: An optional list of comments to attach to the expression. 1004 kwargs: The arguments to set for the expression along with their respective values. 1005 1006 Returns: 1007 The target expression. 1008 """ 1009 instance = exp_class(**kwargs) 1010 instance.add_comments(comments) if comments else self._add_comments(instance) 1011 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1018 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1019 """ 1020 Validates an Expression, making sure that all its mandatory arguments are set. 1021 1022 Args: 1023 expression: The expression to validate. 1024 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1025 1026 Returns: 1027 The validated expression. 1028 """ 1029 if self.error_level != ErrorLevel.IGNORE: 1030 for error_message in expression.error_messages(args): 1031 self.raise_error(error_message) 1032 1033 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.