sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_collection, ensure_list, seq_get 10from sqlglot.tokens import Token, Tokenizer, TokenType 11from sqlglot.trie import in_trie, new_trie 12 13logger = logging.getLogger("sqlglot") 14 15E = t.TypeVar("E", bound=exp.Expression) 16 17 18def parse_var_map(args: t.Sequence) -> exp.Expression: 19 if len(args) == 1 and args[0].is_star: 20 return exp.StarMap(this=args[0]) 21 22 keys = [] 23 values = [] 24 for i in range(0, len(args), 2): 25 keys.append(args[i]) 26 values.append(args[i + 1]) 27 return exp.VarMap( 28 keys=exp.Array(expressions=keys), 29 values=exp.Array(expressions=values), 30 ) 31 32 33def parse_like(args): 34 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 35 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 36 37 38def binary_range_parser( 39 expr_type: t.Type[exp.Expression], 40) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 41 return lambda self, this: self._parse_escape( 42 self.expression(expr_type, this=this, expression=self._parse_bitwise()) 43 ) 44 45 46class _Parser(type): 47 def __new__(cls, clsname, bases, attrs): 48 klass = super().__new__(cls, clsname, bases, attrs) 49 klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 50 klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS) 51 52 return klass 53 54 55class Parser(metaclass=_Parser): 56 """ 57 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 58 a parsed syntax tree. 59 60 Args: 61 error_level: the desired error level. 62 Default: ErrorLevel.RAISE 63 error_message_context: determines the amount of context to capture from a 64 query string when displaying the error message (in number of characters). 65 Default: 50. 66 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 67 Default: 0 68 alias_post_tablesample: If the table alias comes after tablesample. 69 Default: False 70 max_errors: Maximum number of error messages to include in a raised ParseError. 71 This is only relevant if error_level is ErrorLevel.RAISE. 72 Default: 3 73 null_ordering: Indicates the default null ordering method to use if not explicitly set. 74 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 75 Default: "nulls_are_small" 76 """ 77 78 FUNCTIONS: t.Dict[str, t.Callable] = { 79 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 80 "DATE_TO_DATE_STR": lambda args: exp.Cast( 81 this=seq_get(args, 0), 82 to=exp.DataType(this=exp.DataType.Type.TEXT), 83 ), 84 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 85 "IFNULL": exp.Coalesce.from_arg_list, 86 "LIKE": parse_like, 87 "TIME_TO_TIME_STR": lambda args: exp.Cast( 88 this=seq_get(args, 0), 89 to=exp.DataType(this=exp.DataType.Type.TEXT), 90 ), 91 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 92 this=exp.Cast( 93 this=seq_get(args, 0), 94 to=exp.DataType(this=exp.DataType.Type.TEXT), 95 ), 96 start=exp.Literal.number(1), 97 length=exp.Literal.number(10), 98 ), 99 "VAR_MAP": parse_var_map, 100 } 101 102 NO_PAREN_FUNCTIONS = { 103 TokenType.CURRENT_DATE: exp.CurrentDate, 104 TokenType.CURRENT_DATETIME: exp.CurrentDate, 105 TokenType.CURRENT_TIME: exp.CurrentTime, 106 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 107 TokenType.CURRENT_USER: exp.CurrentUser, 108 } 109 110 JOIN_HINTS: t.Set[str] = set() 111 112 NESTED_TYPE_TOKENS = { 113 TokenType.ARRAY, 114 TokenType.MAP, 115 TokenType.NULLABLE, 116 TokenType.STRUCT, 117 } 118 119 TYPE_TOKENS = { 120 TokenType.BIT, 121 TokenType.BOOLEAN, 122 TokenType.TINYINT, 123 TokenType.UTINYINT, 124 TokenType.SMALLINT, 125 TokenType.USMALLINT, 126 TokenType.INT, 127 TokenType.UINT, 128 TokenType.BIGINT, 129 TokenType.UBIGINT, 130 TokenType.INT128, 131 TokenType.UINT128, 132 TokenType.INT256, 133 TokenType.UINT256, 134 TokenType.FLOAT, 135 TokenType.DOUBLE, 136 TokenType.CHAR, 137 TokenType.NCHAR, 138 TokenType.VARCHAR, 139 TokenType.NVARCHAR, 140 TokenType.TEXT, 141 TokenType.MEDIUMTEXT, 142 TokenType.LONGTEXT, 143 TokenType.MEDIUMBLOB, 144 TokenType.LONGBLOB, 145 TokenType.BINARY, 146 TokenType.VARBINARY, 147 TokenType.JSON, 148 TokenType.JSONB, 149 TokenType.INTERVAL, 150 TokenType.TIME, 151 TokenType.TIMESTAMP, 152 TokenType.TIMESTAMPTZ, 153 TokenType.TIMESTAMPLTZ, 154 TokenType.DATETIME, 155 TokenType.DATETIME64, 156 TokenType.DATE, 157 TokenType.DECIMAL, 158 TokenType.BIGDECIMAL, 159 TokenType.UUID, 160 TokenType.GEOGRAPHY, 161 TokenType.GEOMETRY, 162 TokenType.HLLSKETCH, 163 TokenType.HSTORE, 164 TokenType.PSEUDO_TYPE, 165 TokenType.SUPER, 166 TokenType.SERIAL, 167 TokenType.SMALLSERIAL, 168 TokenType.BIGSERIAL, 169 TokenType.XML, 170 TokenType.UNIQUEIDENTIFIER, 171 TokenType.MONEY, 172 TokenType.SMALLMONEY, 173 TokenType.ROWVERSION, 174 TokenType.IMAGE, 175 TokenType.VARIANT, 176 TokenType.OBJECT, 177 TokenType.INET, 178 *NESTED_TYPE_TOKENS, 179 } 180 181 SUBQUERY_PREDICATES = { 182 TokenType.ANY: exp.Any, 183 TokenType.ALL: exp.All, 184 TokenType.EXISTS: exp.Exists, 185 TokenType.SOME: exp.Any, 186 } 187 188 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 189 190 DB_CREATABLES = { 191 TokenType.DATABASE, 192 TokenType.SCHEMA, 193 TokenType.TABLE, 194 TokenType.VIEW, 195 } 196 197 CREATABLES = { 198 TokenType.COLUMN, 199 TokenType.FUNCTION, 200 TokenType.INDEX, 201 TokenType.PROCEDURE, 202 *DB_CREATABLES, 203 } 204 205 ID_VAR_TOKENS = { 206 TokenType.VAR, 207 TokenType.ANTI, 208 TokenType.APPLY, 209 TokenType.AUTO_INCREMENT, 210 TokenType.BEGIN, 211 TokenType.BOTH, 212 TokenType.BUCKET, 213 TokenType.CACHE, 214 TokenType.CASCADE, 215 TokenType.COLLATE, 216 TokenType.COMMAND, 217 TokenType.COMMENT, 218 TokenType.COMMIT, 219 TokenType.COMPOUND, 220 TokenType.CONSTRAINT, 221 TokenType.DEFAULT, 222 TokenType.DELETE, 223 TokenType.DESCRIBE, 224 TokenType.DIV, 225 TokenType.END, 226 TokenType.EXECUTE, 227 TokenType.ESCAPE, 228 TokenType.FALSE, 229 TokenType.FIRST, 230 TokenType.FILTER, 231 TokenType.FOLLOWING, 232 TokenType.FORMAT, 233 TokenType.FULL, 234 TokenType.IF, 235 TokenType.IS, 236 TokenType.ISNULL, 237 TokenType.INTERVAL, 238 TokenType.KEEP, 239 TokenType.LAZY, 240 TokenType.LEADING, 241 TokenType.LEFT, 242 TokenType.LOCAL, 243 TokenType.MATERIALIZED, 244 TokenType.MERGE, 245 TokenType.NATURAL, 246 TokenType.NEXT, 247 TokenType.OFFSET, 248 TokenType.ONLY, 249 TokenType.OPTIONS, 250 TokenType.ORDINALITY, 251 TokenType.OVERWRITE, 252 TokenType.PARTITION, 253 TokenType.PERCENT, 254 TokenType.PIVOT, 255 TokenType.PRAGMA, 256 TokenType.PRECEDING, 257 TokenType.RANGE, 258 TokenType.REFERENCES, 259 TokenType.RIGHT, 260 TokenType.ROW, 261 TokenType.ROWS, 262 TokenType.SEED, 263 TokenType.SEMI, 264 TokenType.SET, 265 TokenType.SETTINGS, 266 TokenType.SHOW, 267 TokenType.SORTKEY, 268 TokenType.TEMPORARY, 269 TokenType.TOP, 270 TokenType.TRAILING, 271 TokenType.TRUE, 272 TokenType.UNBOUNDED, 273 TokenType.UNIQUE, 274 TokenType.UNLOGGED, 275 TokenType.UNPIVOT, 276 TokenType.VOLATILE, 277 TokenType.WINDOW, 278 *CREATABLES, 279 *SUBQUERY_PREDICATES, 280 *TYPE_TOKENS, 281 *NO_PAREN_FUNCTIONS, 282 } 283 284 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 285 286 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 287 TokenType.APPLY, 288 TokenType.FULL, 289 TokenType.LEFT, 290 TokenType.NATURAL, 291 TokenType.OFFSET, 292 TokenType.RIGHT, 293 TokenType.WINDOW, 294 } 295 296 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 297 298 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 299 300 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 301 302 FUNC_TOKENS = { 303 TokenType.COMMAND, 304 TokenType.CURRENT_DATE, 305 TokenType.CURRENT_DATETIME, 306 TokenType.CURRENT_TIMESTAMP, 307 TokenType.CURRENT_TIME, 308 TokenType.CURRENT_USER, 309 TokenType.FILTER, 310 TokenType.FIRST, 311 TokenType.FORMAT, 312 TokenType.GLOB, 313 TokenType.IDENTIFIER, 314 TokenType.INDEX, 315 TokenType.ISNULL, 316 TokenType.ILIKE, 317 TokenType.LIKE, 318 TokenType.MERGE, 319 TokenType.OFFSET, 320 TokenType.PRIMARY_KEY, 321 TokenType.RANGE, 322 TokenType.REPLACE, 323 TokenType.ROW, 324 TokenType.UNNEST, 325 TokenType.VAR, 326 TokenType.LEFT, 327 TokenType.RIGHT, 328 TokenType.DATE, 329 TokenType.DATETIME, 330 TokenType.TABLE, 331 TokenType.TIMESTAMP, 332 TokenType.TIMESTAMPTZ, 333 TokenType.WINDOW, 334 *TYPE_TOKENS, 335 *SUBQUERY_PREDICATES, 336 } 337 338 CONJUNCTION = { 339 TokenType.AND: exp.And, 340 TokenType.OR: exp.Or, 341 } 342 343 EQUALITY = { 344 TokenType.EQ: exp.EQ, 345 TokenType.NEQ: exp.NEQ, 346 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 347 } 348 349 COMPARISON = { 350 TokenType.GT: exp.GT, 351 TokenType.GTE: exp.GTE, 352 TokenType.LT: exp.LT, 353 TokenType.LTE: exp.LTE, 354 } 355 356 BITWISE = { 357 TokenType.AMP: exp.BitwiseAnd, 358 TokenType.CARET: exp.BitwiseXor, 359 TokenType.PIPE: exp.BitwiseOr, 360 TokenType.DPIPE: exp.DPipe, 361 } 362 363 TERM = { 364 TokenType.DASH: exp.Sub, 365 TokenType.PLUS: exp.Add, 366 TokenType.MOD: exp.Mod, 367 TokenType.COLLATE: exp.Collate, 368 } 369 370 FACTOR = { 371 TokenType.DIV: exp.IntDiv, 372 TokenType.LR_ARROW: exp.Distance, 373 TokenType.SLASH: exp.Div, 374 TokenType.STAR: exp.Mul, 375 } 376 377 TIMESTAMPS = { 378 TokenType.TIME, 379 TokenType.TIMESTAMP, 380 TokenType.TIMESTAMPTZ, 381 TokenType.TIMESTAMPLTZ, 382 } 383 384 SET_OPERATIONS = { 385 TokenType.UNION, 386 TokenType.INTERSECT, 387 TokenType.EXCEPT, 388 } 389 390 JOIN_SIDES = { 391 TokenType.LEFT, 392 TokenType.RIGHT, 393 TokenType.FULL, 394 } 395 396 JOIN_KINDS = { 397 TokenType.INNER, 398 TokenType.OUTER, 399 TokenType.CROSS, 400 TokenType.SEMI, 401 TokenType.ANTI, 402 } 403 404 LAMBDAS = { 405 TokenType.ARROW: lambda self, expressions: self.expression( 406 exp.Lambda, 407 this=self._replace_lambda( 408 self._parse_conjunction(), 409 {node.name for node in expressions}, 410 ), 411 expressions=expressions, 412 ), 413 TokenType.FARROW: lambda self, expressions: self.expression( 414 exp.Kwarg, 415 this=exp.Var(this=expressions[0].name), 416 expression=self._parse_conjunction(), 417 ), 418 } 419 420 COLUMN_OPERATORS = { 421 TokenType.DOT: None, 422 TokenType.DCOLON: lambda self, this, to: self.expression( 423 exp.Cast if self.STRICT_CAST else exp.TryCast, 424 this=this, 425 to=to, 426 ), 427 TokenType.ARROW: lambda self, this, path: self.expression( 428 exp.JSONExtract, 429 this=this, 430 expression=path, 431 ), 432 TokenType.DARROW: lambda self, this, path: self.expression( 433 exp.JSONExtractScalar, 434 this=this, 435 expression=path, 436 ), 437 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 438 exp.JSONBExtract, 439 this=this, 440 expression=path, 441 ), 442 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 443 exp.JSONBExtractScalar, 444 this=this, 445 expression=path, 446 ), 447 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 448 exp.JSONBContains, 449 this=this, 450 expression=key, 451 ), 452 } 453 454 EXPRESSION_PARSERS = { 455 exp.Column: lambda self: self._parse_column(), 456 exp.DataType: lambda self: self._parse_types(), 457 exp.From: lambda self: self._parse_from(), 458 exp.Group: lambda self: self._parse_group(), 459 exp.Identifier: lambda self: self._parse_id_var(), 460 exp.Lateral: lambda self: self._parse_lateral(), 461 exp.Join: lambda self: self._parse_join(), 462 exp.Order: lambda self: self._parse_order(), 463 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 464 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 465 exp.Lambda: lambda self: self._parse_lambda(), 466 exp.Limit: lambda self: self._parse_limit(), 467 exp.Offset: lambda self: self._parse_offset(), 468 exp.TableAlias: lambda self: self._parse_table_alias(), 469 exp.Table: lambda self: self._parse_table(), 470 exp.Condition: lambda self: self._parse_conjunction(), 471 exp.Expression: lambda self: self._parse_statement(), 472 exp.Properties: lambda self: self._parse_properties(), 473 exp.Where: lambda self: self._parse_where(), 474 exp.Ordered: lambda self: self._parse_ordered(), 475 exp.Having: lambda self: self._parse_having(), 476 exp.With: lambda self: self._parse_with(), 477 exp.Window: lambda self: self._parse_named_window(), 478 exp.Qualify: lambda self: self._parse_qualify(), 479 exp.Returning: lambda self: self._parse_returning(), 480 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 481 } 482 483 STATEMENT_PARSERS = { 484 TokenType.ALTER: lambda self: self._parse_alter(), 485 TokenType.BEGIN: lambda self: self._parse_transaction(), 486 TokenType.CACHE: lambda self: self._parse_cache(), 487 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 488 TokenType.COMMENT: lambda self: self._parse_comment(), 489 TokenType.CREATE: lambda self: self._parse_create(), 490 TokenType.DELETE: lambda self: self._parse_delete(), 491 TokenType.DESC: lambda self: self._parse_describe(), 492 TokenType.DESCRIBE: lambda self: self._parse_describe(), 493 TokenType.DROP: lambda self: self._parse_drop(), 494 TokenType.END: lambda self: self._parse_commit_or_rollback(), 495 TokenType.INSERT: lambda self: self._parse_insert(), 496 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 497 TokenType.MERGE: lambda self: self._parse_merge(), 498 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 499 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 500 TokenType.SET: lambda self: self._parse_set(), 501 TokenType.UNCACHE: lambda self: self._parse_uncache(), 502 TokenType.UPDATE: lambda self: self._parse_update(), 503 TokenType.USE: lambda self: self.expression( 504 exp.Use, 505 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 506 and exp.Var(this=self._prev.text), 507 this=self._parse_table(schema=False), 508 ), 509 } 510 511 UNARY_PARSERS = { 512 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 513 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 514 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 515 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 516 } 517 518 PRIMARY_PARSERS = { 519 TokenType.STRING: lambda self, token: self.expression( 520 exp.Literal, this=token.text, is_string=True 521 ), 522 TokenType.NUMBER: lambda self, token: self.expression( 523 exp.Literal, this=token.text, is_string=False 524 ), 525 TokenType.STAR: lambda self, _: self.expression( 526 exp.Star, 527 **{"except": self._parse_except(), "replace": self._parse_replace()}, 528 ), 529 TokenType.NULL: lambda self, _: self.expression(exp.Null), 530 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 531 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 532 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 533 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 534 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 535 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 536 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 537 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 538 } 539 540 PLACEHOLDER_PARSERS = { 541 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 542 TokenType.PARAMETER: lambda self: self._parse_parameter(), 543 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 544 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 545 else None, 546 } 547 548 RANGE_PARSERS = { 549 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 550 TokenType.GLOB: binary_range_parser(exp.Glob), 551 TokenType.ILIKE: binary_range_parser(exp.ILike), 552 TokenType.IN: lambda self, this: self._parse_in(this), 553 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 554 TokenType.IS: lambda self, this: self._parse_is(this), 555 TokenType.LIKE: binary_range_parser(exp.Like), 556 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 557 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 558 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 559 } 560 561 PROPERTY_PARSERS = { 562 "AFTER": lambda self: self._parse_afterjournal( 563 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 564 ), 565 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 566 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 567 "BEFORE": lambda self: self._parse_journal( 568 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 569 ), 570 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 571 "CHARACTER SET": lambda self: self._parse_character_set(), 572 "CHECKSUM": lambda self: self._parse_checksum(), 573 "CLUSTER BY": lambda self: self.expression( 574 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 575 ), 576 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 577 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 578 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 579 default=self._prev.text.upper() == "DEFAULT" 580 ), 581 "DEFINER": lambda self: self._parse_definer(), 582 "DETERMINISTIC": lambda self: self.expression( 583 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 584 ), 585 "DISTKEY": lambda self: self._parse_distkey(), 586 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 587 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 588 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 589 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 590 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 591 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 592 "FREESPACE": lambda self: self._parse_freespace(), 593 "GLOBAL": lambda self: self._parse_temporary(global_=True), 594 "IMMUTABLE": lambda self: self.expression( 595 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 596 ), 597 "JOURNAL": lambda self: self._parse_journal( 598 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 599 ), 600 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 601 "LIKE": lambda self: self._parse_create_like(), 602 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 603 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 604 "LOCK": lambda self: self._parse_locking(), 605 "LOCKING": lambda self: self._parse_locking(), 606 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 607 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 608 "MAX": lambda self: self._parse_datablocksize(), 609 "MAXIMUM": lambda self: self._parse_datablocksize(), 610 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 611 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 612 ), 613 "MIN": lambda self: self._parse_datablocksize(), 614 "MINIMUM": lambda self: self._parse_datablocksize(), 615 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 616 "NO": lambda self: self._parse_noprimaryindex(), 617 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 618 "ON": lambda self: self._parse_oncommit(), 619 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 620 "PARTITION BY": lambda self: self._parse_partitioned_by(), 621 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 622 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 623 "PRIMARY KEY": lambda self: self._parse_primary_key(), 624 "RETURNS": lambda self: self._parse_returns(), 625 "ROW": lambda self: self._parse_row(), 626 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 627 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 628 "SETTINGS": lambda self: self.expression( 629 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 630 ), 631 "SORTKEY": lambda self: self._parse_sortkey(), 632 "STABLE": lambda self: self.expression( 633 exp.StabilityProperty, this=exp.Literal.string("STABLE") 634 ), 635 "STORED": lambda self: self._parse_stored(), 636 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 637 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 638 "TEMP": lambda self: self._parse_temporary(global_=False), 639 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 640 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 641 "TTL": lambda self: self._parse_ttl(), 642 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 643 "VOLATILE": lambda self: self._parse_volatile_property(), 644 "WITH": lambda self: self._parse_with_property(), 645 } 646 647 CONSTRAINT_PARSERS = { 648 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 649 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 650 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 651 "CHARACTER SET": lambda self: self.expression( 652 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 653 ), 654 "CHECK": lambda self: self.expression( 655 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 656 ), 657 "COLLATE": lambda self: self.expression( 658 exp.CollateColumnConstraint, this=self._parse_var() 659 ), 660 "COMMENT": lambda self: self.expression( 661 exp.CommentColumnConstraint, this=self._parse_string() 662 ), 663 "COMPRESS": lambda self: self._parse_compress(), 664 "DEFAULT": lambda self: self.expression( 665 exp.DefaultColumnConstraint, this=self._parse_bitwise() 666 ), 667 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 668 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 669 "FORMAT": lambda self: self.expression( 670 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 671 ), 672 "GENERATED": lambda self: self._parse_generated_as_identity(), 673 "IDENTITY": lambda self: self._parse_auto_increment(), 674 "INLINE": lambda self: self._parse_inline(), 675 "LIKE": lambda self: self._parse_create_like(), 676 "NOT": lambda self: self._parse_not_constraint(), 677 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 678 "ON": lambda self: self._match(TokenType.UPDATE) 679 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 680 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 681 "PRIMARY KEY": lambda self: self._parse_primary_key(), 682 "REFERENCES": lambda self: self._parse_references(match=False), 683 "TITLE": lambda self: self.expression( 684 exp.TitleColumnConstraint, this=self._parse_var_or_string() 685 ), 686 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 687 "UNIQUE": lambda self: self._parse_unique(), 688 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 689 } 690 691 ALTER_PARSERS = { 692 "ADD": lambda self: self._parse_alter_table_add(), 693 "ALTER": lambda self: self._parse_alter_table_alter(), 694 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 695 "DROP": lambda self: self._parse_alter_table_drop(), 696 "RENAME": lambda self: self._parse_alter_table_rename(), 697 } 698 699 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 700 701 NO_PAREN_FUNCTION_PARSERS = { 702 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 703 TokenType.CASE: lambda self: self._parse_case(), 704 TokenType.IF: lambda self: self._parse_if(), 705 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 706 exp.NextValueFor, 707 this=self._parse_column(), 708 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 709 ), 710 } 711 712 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 713 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 714 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 715 "DECODE": lambda self: self._parse_decode(), 716 "EXTRACT": lambda self: self._parse_extract(), 717 "JSON_OBJECT": lambda self: self._parse_json_object(), 718 "LOG": lambda self: self._parse_logarithm(), 719 "MATCH": lambda self: self._parse_match_against(), 720 "OPENJSON": lambda self: self._parse_open_json(), 721 "POSITION": lambda self: self._parse_position(), 722 "STRING_AGG": lambda self: self._parse_string_agg(), 723 "SUBSTRING": lambda self: self._parse_substring(), 724 "TRIM": lambda self: self._parse_trim(), 725 "TRY_CAST": lambda self: self._parse_cast(False), 726 "TRY_CONVERT": lambda self: self._parse_convert(False), 727 } 728 729 QUERY_MODIFIER_PARSERS = { 730 "joins": lambda self: list(iter(self._parse_join, None)), 731 "laterals": lambda self: list(iter(self._parse_lateral, None)), 732 "match": lambda self: self._parse_match_recognize(), 733 "where": lambda self: self._parse_where(), 734 "group": lambda self: self._parse_group(), 735 "having": lambda self: self._parse_having(), 736 "qualify": lambda self: self._parse_qualify(), 737 "windows": lambda self: self._parse_window_clause(), 738 "order": lambda self: self._parse_order(), 739 "limit": lambda self: self._parse_limit(), 740 "offset": lambda self: self._parse_offset(), 741 "lock": lambda self: self._parse_lock(), 742 "sample": lambda self: self._parse_table_sample(as_modifier=True), 743 } 744 745 SET_PARSERS = { 746 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 747 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 748 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 749 "TRANSACTION": lambda self: self._parse_set_transaction(), 750 } 751 752 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 753 754 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 755 756 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 757 758 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 759 760 TRANSACTION_CHARACTERISTICS = { 761 "ISOLATION LEVEL REPEATABLE READ", 762 "ISOLATION LEVEL READ COMMITTED", 763 "ISOLATION LEVEL READ UNCOMMITTED", 764 "ISOLATION LEVEL SERIALIZABLE", 765 "READ WRITE", 766 "READ ONLY", 767 } 768 769 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 770 771 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 772 773 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 774 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 775 776 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 777 778 STRICT_CAST = True 779 780 CONVERT_TYPE_FIRST = False 781 782 PREFIXED_PIVOT_COLUMNS = False 783 IDENTIFY_PIVOT_STRINGS = False 784 785 LOG_BASE_FIRST = True 786 LOG_DEFAULTS_TO_LN = False 787 788 __slots__ = ( 789 "error_level", 790 "error_message_context", 791 "sql", 792 "errors", 793 "index_offset", 794 "unnest_column_only", 795 "alias_post_tablesample", 796 "max_errors", 797 "null_ordering", 798 "_tokens", 799 "_index", 800 "_curr", 801 "_next", 802 "_prev", 803 "_prev_comments", 804 "_show_trie", 805 "_set_trie", 806 ) 807 808 def __init__( 809 self, 810 error_level: t.Optional[ErrorLevel] = None, 811 error_message_context: int = 100, 812 index_offset: int = 0, 813 unnest_column_only: bool = False, 814 alias_post_tablesample: bool = False, 815 max_errors: int = 3, 816 null_ordering: t.Optional[str] = None, 817 ): 818 self.error_level = error_level or ErrorLevel.IMMEDIATE 819 self.error_message_context = error_message_context 820 self.index_offset = index_offset 821 self.unnest_column_only = unnest_column_only 822 self.alias_post_tablesample = alias_post_tablesample 823 self.max_errors = max_errors 824 self.null_ordering = null_ordering 825 self.reset() 826 827 def reset(self): 828 self.sql = "" 829 self.errors = [] 830 self._tokens = [] 831 self._index = 0 832 self._curr = None 833 self._next = None 834 self._prev = None 835 self._prev_comments = None 836 837 def parse( 838 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 839 ) -> t.List[t.Optional[exp.Expression]]: 840 """ 841 Parses a list of tokens and returns a list of syntax trees, one tree 842 per parsed SQL statement. 843 844 Args: 845 raw_tokens: the list of tokens. 846 sql: the original SQL string, used to produce helpful debug messages. 847 848 Returns: 849 The list of syntax trees. 850 """ 851 return self._parse( 852 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 853 ) 854 855 def parse_into( 856 self, 857 expression_types: exp.IntoType, 858 raw_tokens: t.List[Token], 859 sql: t.Optional[str] = None, 860 ) -> t.List[t.Optional[exp.Expression]]: 861 """ 862 Parses a list of tokens into a given Expression type. If a collection of Expression 863 types is given instead, this method will try to parse the token list into each one 864 of them, stopping at the first for which the parsing succeeds. 865 866 Args: 867 expression_types: the expression type(s) to try and parse the token list into. 868 raw_tokens: the list of tokens. 869 sql: the original SQL string, used to produce helpful debug messages. 870 871 Returns: 872 The target Expression. 873 """ 874 errors = [] 875 for expression_type in ensure_collection(expression_types): 876 parser = self.EXPRESSION_PARSERS.get(expression_type) 877 if not parser: 878 raise TypeError(f"No parser registered for {expression_type}") 879 try: 880 return self._parse(parser, raw_tokens, sql) 881 except ParseError as e: 882 e.errors[0]["into_expression"] = expression_type 883 errors.append(e) 884 raise ParseError( 885 f"Failed to parse into {expression_types}", 886 errors=merge_errors(errors), 887 ) from errors[-1] 888 889 def _parse( 890 self, 891 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 892 raw_tokens: t.List[Token], 893 sql: t.Optional[str] = None, 894 ) -> t.List[t.Optional[exp.Expression]]: 895 self.reset() 896 self.sql = sql or "" 897 total = len(raw_tokens) 898 chunks: t.List[t.List[Token]] = [[]] 899 900 for i, token in enumerate(raw_tokens): 901 if token.token_type == TokenType.SEMICOLON: 902 if i < total - 1: 903 chunks.append([]) 904 else: 905 chunks[-1].append(token) 906 907 expressions = [] 908 909 for tokens in chunks: 910 self._index = -1 911 self._tokens = tokens 912 self._advance() 913 914 expressions.append(parse_method(self)) 915 916 if self._index < len(self._tokens): 917 self.raise_error("Invalid expression / Unexpected token") 918 919 self.check_errors() 920 921 return expressions 922 923 def check_errors(self) -> None: 924 """ 925 Logs or raises any found errors, depending on the chosen error level setting. 926 """ 927 if self.error_level == ErrorLevel.WARN: 928 for error in self.errors: 929 logger.error(str(error)) 930 elif self.error_level == ErrorLevel.RAISE and self.errors: 931 raise ParseError( 932 concat_messages(self.errors, self.max_errors), 933 errors=merge_errors(self.errors), 934 ) 935 936 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 937 """ 938 Appends an error in the list of recorded errors or raises it, depending on the chosen 939 error level setting. 940 """ 941 token = token or self._curr or self._prev or Token.string("") 942 start = token.start 943 end = token.end 944 start_context = self.sql[max(start - self.error_message_context, 0) : start] 945 highlight = self.sql[start:end] 946 end_context = self.sql[end : end + self.error_message_context] 947 948 error = ParseError.new( 949 f"{message}. Line {token.line}, Col: {token.col}.\n" 950 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 951 description=message, 952 line=token.line, 953 col=token.col, 954 start_context=start_context, 955 highlight=highlight, 956 end_context=end_context, 957 ) 958 959 if self.error_level == ErrorLevel.IMMEDIATE: 960 raise error 961 962 self.errors.append(error) 963 964 def expression( 965 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 966 ) -> E: 967 """ 968 Creates a new, validated Expression. 969 970 Args: 971 exp_class: the expression class to instantiate. 972 comments: an optional list of comments to attach to the expression. 973 kwargs: the arguments to set for the expression along with their respective values. 974 975 Returns: 976 The target expression. 977 """ 978 instance = exp_class(**kwargs) 979 instance.add_comments(comments) if comments else self._add_comments(instance) 980 self.validate_expression(instance) 981 return instance 982 983 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 984 if expression and self._prev_comments: 985 expression.add_comments(self._prev_comments) 986 self._prev_comments = None 987 988 def validate_expression( 989 self, expression: exp.Expression, args: t.Optional[t.List] = None 990 ) -> None: 991 """ 992 Validates an already instantiated expression, making sure that all its mandatory arguments 993 are set. 994 995 Args: 996 expression: the expression to validate. 997 args: an optional list of items that was used to instantiate the expression, if it's a Func. 998 """ 999 if self.error_level == ErrorLevel.IGNORE: 1000 return 1001 1002 for error_message in expression.error_messages(args): 1003 self.raise_error(error_message) 1004 1005 def _find_sql(self, start: Token, end: Token) -> str: 1006 return self.sql[start.start : end.end] 1007 1008 def _advance(self, times: int = 1) -> None: 1009 self._index += times 1010 self._curr = seq_get(self._tokens, self._index) 1011 self._next = seq_get(self._tokens, self._index + 1) 1012 if self._index > 0: 1013 self._prev = self._tokens[self._index - 1] 1014 self._prev_comments = self._prev.comments 1015 else: 1016 self._prev = None 1017 self._prev_comments = None 1018 1019 def _retreat(self, index: int) -> None: 1020 if index != self._index: 1021 self._advance(index - self._index) 1022 1023 def _parse_command(self) -> exp.Command: 1024 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1025 1026 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1027 start = self._prev 1028 exists = self._parse_exists() if allow_exists else None 1029 1030 self._match(TokenType.ON) 1031 1032 kind = self._match_set(self.CREATABLES) and self._prev 1033 1034 if not kind: 1035 return self._parse_as_command(start) 1036 1037 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1038 this = self._parse_user_defined_function(kind=kind.token_type) 1039 elif kind.token_type == TokenType.TABLE: 1040 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1041 elif kind.token_type == TokenType.COLUMN: 1042 this = self._parse_column() 1043 else: 1044 this = self._parse_id_var() 1045 1046 self._match(TokenType.IS) 1047 1048 return self.expression( 1049 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1050 ) 1051 1052 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1053 def _parse_ttl(self) -> exp.Expression: 1054 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1055 this = self._parse_bitwise() 1056 1057 if self._match_text_seq("DELETE"): 1058 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1059 if self._match_text_seq("RECOMPRESS"): 1060 return self.expression( 1061 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1062 ) 1063 if self._match_text_seq("TO", "DISK"): 1064 return self.expression( 1065 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1066 ) 1067 if self._match_text_seq("TO", "VOLUME"): 1068 return self.expression( 1069 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1070 ) 1071 1072 return this 1073 1074 expressions = self._parse_csv(_parse_ttl_action) 1075 where = self._parse_where() 1076 group = self._parse_group() 1077 1078 aggregates = None 1079 if group and self._match(TokenType.SET): 1080 aggregates = self._parse_csv(self._parse_set_item) 1081 1082 return self.expression( 1083 exp.MergeTreeTTL, 1084 expressions=expressions, 1085 where=where, 1086 group=group, 1087 aggregates=aggregates, 1088 ) 1089 1090 def _parse_statement(self) -> t.Optional[exp.Expression]: 1091 if self._curr is None: 1092 return None 1093 1094 if self._match_set(self.STATEMENT_PARSERS): 1095 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1096 1097 if self._match_set(Tokenizer.COMMANDS): 1098 return self._parse_command() 1099 1100 expression = self._parse_expression() 1101 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1102 return self._parse_query_modifiers(expression) 1103 1104 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1105 start = self._prev 1106 temporary = self._match(TokenType.TEMPORARY) 1107 materialized = self._match(TokenType.MATERIALIZED) 1108 kind = self._match_set(self.CREATABLES) and self._prev.text 1109 if not kind: 1110 return self._parse_as_command(start) 1111 1112 return self.expression( 1113 exp.Drop, 1114 exists=self._parse_exists(), 1115 this=self._parse_table(schema=True), 1116 kind=kind, 1117 temporary=temporary, 1118 materialized=materialized, 1119 cascade=self._match(TokenType.CASCADE), 1120 constraints=self._match_text_seq("CONSTRAINTS"), 1121 purge=self._match_text_seq("PURGE"), 1122 ) 1123 1124 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1125 return ( 1126 self._match(TokenType.IF) 1127 and (not not_ or self._match(TokenType.NOT)) 1128 and self._match(TokenType.EXISTS) 1129 ) 1130 1131 def _parse_create(self) -> t.Optional[exp.Expression]: 1132 start = self._prev 1133 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1134 TokenType.OR, TokenType.REPLACE 1135 ) 1136 unique = self._match(TokenType.UNIQUE) 1137 1138 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1139 self._match(TokenType.TABLE) 1140 1141 properties = None 1142 create_token = self._match_set(self.CREATABLES) and self._prev 1143 1144 if not create_token: 1145 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1146 create_token = self._match_set(self.CREATABLES) and self._prev 1147 1148 if not properties or not create_token: 1149 return self._parse_as_command(start) 1150 1151 exists = self._parse_exists(not_=True) 1152 this = None 1153 expression = None 1154 indexes = None 1155 no_schema_binding = None 1156 begin = None 1157 clone = None 1158 1159 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1160 this = self._parse_user_defined_function(kind=create_token.token_type) 1161 temp_properties = self._parse_properties() 1162 if properties and temp_properties: 1163 properties.expressions.extend(temp_properties.expressions) 1164 elif temp_properties: 1165 properties = temp_properties 1166 1167 self._match(TokenType.ALIAS) 1168 begin = self._match(TokenType.BEGIN) 1169 return_ = self._match_text_seq("RETURN") 1170 expression = self._parse_statement() 1171 1172 if return_: 1173 expression = self.expression(exp.Return, this=expression) 1174 elif create_token.token_type == TokenType.INDEX: 1175 this = self._parse_index() 1176 elif create_token.token_type in self.DB_CREATABLES: 1177 table_parts = self._parse_table_parts(schema=True) 1178 1179 # exp.Properties.Location.POST_NAME 1180 if self._match(TokenType.COMMA): 1181 temp_properties = self._parse_properties(before=True) 1182 if properties and temp_properties: 1183 properties.expressions.extend(temp_properties.expressions) 1184 elif temp_properties: 1185 properties = temp_properties 1186 1187 this = self._parse_schema(this=table_parts) 1188 1189 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1190 temp_properties = self._parse_properties() 1191 if properties and temp_properties: 1192 properties.expressions.extend(temp_properties.expressions) 1193 elif temp_properties: 1194 properties = temp_properties 1195 1196 self._match(TokenType.ALIAS) 1197 1198 # exp.Properties.Location.POST_ALIAS 1199 if not ( 1200 self._match(TokenType.SELECT, advance=False) 1201 or self._match(TokenType.WITH, advance=False) 1202 or self._match(TokenType.L_PAREN, advance=False) 1203 ): 1204 temp_properties = self._parse_properties() 1205 if properties and temp_properties: 1206 properties.expressions.extend(temp_properties.expressions) 1207 elif temp_properties: 1208 properties = temp_properties 1209 1210 expression = self._parse_ddl_select() 1211 1212 if create_token.token_type == TokenType.TABLE: 1213 # exp.Properties.Location.POST_EXPRESSION 1214 temp_properties = self._parse_properties() 1215 if properties and temp_properties: 1216 properties.expressions.extend(temp_properties.expressions) 1217 elif temp_properties: 1218 properties = temp_properties 1219 1220 indexes = [] 1221 while True: 1222 index = self._parse_create_table_index() 1223 1224 # exp.Properties.Location.POST_INDEX 1225 if self._match(TokenType.PARTITION_BY, advance=False): 1226 temp_properties = self._parse_properties() 1227 if properties and temp_properties: 1228 properties.expressions.extend(temp_properties.expressions) 1229 elif temp_properties: 1230 properties = temp_properties 1231 1232 if not index: 1233 break 1234 else: 1235 indexes.append(index) 1236 elif create_token.token_type == TokenType.VIEW: 1237 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1238 no_schema_binding = True 1239 1240 if self._match_text_seq("CLONE"): 1241 clone = self._parse_table(schema=True) 1242 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1243 clone_kind = ( 1244 self._match(TokenType.L_PAREN) 1245 and self._match_texts(self.CLONE_KINDS) 1246 and self._prev.text.upper() 1247 ) 1248 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1249 self._match(TokenType.R_PAREN) 1250 clone = self.expression( 1251 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1252 ) 1253 1254 return self.expression( 1255 exp.Create, 1256 this=this, 1257 kind=create_token.text, 1258 replace=replace, 1259 unique=unique, 1260 expression=expression, 1261 exists=exists, 1262 properties=properties, 1263 indexes=indexes, 1264 no_schema_binding=no_schema_binding, 1265 begin=begin, 1266 clone=clone, 1267 ) 1268 1269 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1270 self._match(TokenType.COMMA) 1271 1272 # parsers look to _prev for no/dual/default, so need to consume first 1273 self._match_text_seq("NO") 1274 self._match_text_seq("DUAL") 1275 self._match_text_seq("DEFAULT") 1276 1277 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1278 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1279 1280 return None 1281 1282 def _parse_property(self) -> t.Optional[exp.Expression]: 1283 if self._match_texts(self.PROPERTY_PARSERS): 1284 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1285 1286 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1287 return self._parse_character_set(default=True) 1288 1289 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1290 return self._parse_sortkey(compound=True) 1291 1292 if self._match_text_seq("SQL", "SECURITY"): 1293 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1294 1295 assignment = self._match_pair( 1296 TokenType.VAR, TokenType.EQ, advance=False 1297 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1298 1299 if assignment: 1300 key = self._parse_var_or_string() 1301 self._match(TokenType.EQ) 1302 return self.expression(exp.Property, this=key, value=self._parse_column()) 1303 1304 return None 1305 1306 def _parse_stored(self) -> exp.Expression: 1307 self._match(TokenType.ALIAS) 1308 1309 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1310 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1311 1312 return self.expression( 1313 exp.FileFormatProperty, 1314 this=self.expression( 1315 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1316 ) 1317 if input_format or output_format 1318 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1319 ) 1320 1321 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1322 self._match(TokenType.EQ) 1323 self._match(TokenType.ALIAS) 1324 return self.expression(exp_class, this=self._parse_field()) 1325 1326 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1327 properties = [] 1328 1329 while True: 1330 if before: 1331 identified_property = self._parse_property_before() 1332 else: 1333 identified_property = self._parse_property() 1334 1335 if not identified_property: 1336 break 1337 for p in ensure_list(identified_property): 1338 properties.append(p) 1339 1340 if properties: 1341 return self.expression(exp.Properties, expressions=properties) 1342 1343 return None 1344 1345 def _parse_fallback(self, no=False) -> exp.Expression: 1346 self._match_text_seq("FALLBACK") 1347 return self.expression( 1348 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1349 ) 1350 1351 def _parse_volatile_property(self) -> exp.Expression: 1352 if self._index >= 2: 1353 pre_volatile_token = self._tokens[self._index - 2] 1354 else: 1355 pre_volatile_token = None 1356 1357 if pre_volatile_token and pre_volatile_token.token_type in ( 1358 TokenType.CREATE, 1359 TokenType.REPLACE, 1360 TokenType.UNIQUE, 1361 ): 1362 return exp.VolatileProperty() 1363 1364 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1365 1366 def _parse_with_property( 1367 self, 1368 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1369 self._match(TokenType.WITH) 1370 if self._match(TokenType.L_PAREN, advance=False): 1371 return self._parse_wrapped_csv(self._parse_property) 1372 1373 if self._match_text_seq("JOURNAL"): 1374 return self._parse_withjournaltable() 1375 1376 if self._match_text_seq("DATA"): 1377 return self._parse_withdata(no=False) 1378 elif self._match_text_seq("NO", "DATA"): 1379 return self._parse_withdata(no=True) 1380 1381 if not self._next: 1382 return None 1383 1384 return self._parse_withisolatedloading() 1385 1386 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1387 def _parse_definer(self) -> t.Optional[exp.Expression]: 1388 self._match(TokenType.EQ) 1389 1390 user = self._parse_id_var() 1391 self._match(TokenType.PARAMETER) 1392 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1393 1394 if not user or not host: 1395 return None 1396 1397 return exp.DefinerProperty(this=f"{user}@{host}") 1398 1399 def _parse_withjournaltable(self) -> exp.Expression: 1400 self._match(TokenType.TABLE) 1401 self._match(TokenType.EQ) 1402 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1403 1404 def _parse_log(self, no=False) -> exp.Expression: 1405 self._match_text_seq("LOG") 1406 return self.expression(exp.LogProperty, no=no) 1407 1408 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1409 before = self._match_text_seq("BEFORE") 1410 self._match_text_seq("JOURNAL") 1411 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1412 1413 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1414 self._match_text_seq("NOT") 1415 self._match_text_seq("LOCAL") 1416 self._match_text_seq("AFTER", "JOURNAL") 1417 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1418 1419 def _parse_checksum(self) -> exp.Expression: 1420 self._match_text_seq("CHECKSUM") 1421 self._match(TokenType.EQ) 1422 1423 on = None 1424 if self._match(TokenType.ON): 1425 on = True 1426 elif self._match_text_seq("OFF"): 1427 on = False 1428 default = self._match(TokenType.DEFAULT) 1429 1430 return self.expression( 1431 exp.ChecksumProperty, 1432 on=on, 1433 default=default, 1434 ) 1435 1436 def _parse_freespace(self) -> exp.Expression: 1437 self._match_text_seq("FREESPACE") 1438 self._match(TokenType.EQ) 1439 return self.expression( 1440 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1441 ) 1442 1443 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1444 self._match_text_seq("MERGEBLOCKRATIO") 1445 if self._match(TokenType.EQ): 1446 return self.expression( 1447 exp.MergeBlockRatioProperty, 1448 this=self._parse_number(), 1449 percent=self._match(TokenType.PERCENT), 1450 ) 1451 else: 1452 return self.expression( 1453 exp.MergeBlockRatioProperty, 1454 no=no, 1455 default=default, 1456 ) 1457 1458 def _parse_datablocksize(self, default=None) -> exp.Expression: 1459 if default: 1460 self._match_text_seq("DATABLOCKSIZE") 1461 return self.expression(exp.DataBlocksizeProperty, default=True) 1462 elif self._match_texts(("MIN", "MINIMUM")): 1463 self._match_text_seq("DATABLOCKSIZE") 1464 return self.expression(exp.DataBlocksizeProperty, min=True) 1465 elif self._match_texts(("MAX", "MAXIMUM")): 1466 self._match_text_seq("DATABLOCKSIZE") 1467 return self.expression(exp.DataBlocksizeProperty, min=False) 1468 1469 self._match_text_seq("DATABLOCKSIZE") 1470 self._match(TokenType.EQ) 1471 size = self._parse_number() 1472 units = None 1473 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1474 units = self._prev.text 1475 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1476 1477 def _parse_blockcompression(self) -> exp.Expression: 1478 self._match_text_seq("BLOCKCOMPRESSION") 1479 self._match(TokenType.EQ) 1480 always = self._match_text_seq("ALWAYS") 1481 manual = self._match_text_seq("MANUAL") 1482 never = self._match_text_seq("NEVER") 1483 default = self._match_text_seq("DEFAULT") 1484 autotemp = None 1485 if self._match_text_seq("AUTOTEMP"): 1486 autotemp = self._parse_schema() 1487 1488 return self.expression( 1489 exp.BlockCompressionProperty, 1490 always=always, 1491 manual=manual, 1492 never=never, 1493 default=default, 1494 autotemp=autotemp, 1495 ) 1496 1497 def _parse_withisolatedloading(self) -> exp.Expression: 1498 no = self._match_text_seq("NO") 1499 concurrent = self._match_text_seq("CONCURRENT") 1500 self._match_text_seq("ISOLATED", "LOADING") 1501 for_all = self._match_text_seq("FOR", "ALL") 1502 for_insert = self._match_text_seq("FOR", "INSERT") 1503 for_none = self._match_text_seq("FOR", "NONE") 1504 return self.expression( 1505 exp.IsolatedLoadingProperty, 1506 no=no, 1507 concurrent=concurrent, 1508 for_all=for_all, 1509 for_insert=for_insert, 1510 for_none=for_none, 1511 ) 1512 1513 def _parse_locking(self) -> exp.Expression: 1514 if self._match(TokenType.TABLE): 1515 kind = "TABLE" 1516 elif self._match(TokenType.VIEW): 1517 kind = "VIEW" 1518 elif self._match(TokenType.ROW): 1519 kind = "ROW" 1520 elif self._match_text_seq("DATABASE"): 1521 kind = "DATABASE" 1522 else: 1523 kind = None 1524 1525 if kind in ("DATABASE", "TABLE", "VIEW"): 1526 this = self._parse_table_parts() 1527 else: 1528 this = None 1529 1530 if self._match(TokenType.FOR): 1531 for_or_in = "FOR" 1532 elif self._match(TokenType.IN): 1533 for_or_in = "IN" 1534 else: 1535 for_or_in = None 1536 1537 if self._match_text_seq("ACCESS"): 1538 lock_type = "ACCESS" 1539 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1540 lock_type = "EXCLUSIVE" 1541 elif self._match_text_seq("SHARE"): 1542 lock_type = "SHARE" 1543 elif self._match_text_seq("READ"): 1544 lock_type = "READ" 1545 elif self._match_text_seq("WRITE"): 1546 lock_type = "WRITE" 1547 elif self._match_text_seq("CHECKSUM"): 1548 lock_type = "CHECKSUM" 1549 else: 1550 lock_type = None 1551 1552 override = self._match_text_seq("OVERRIDE") 1553 1554 return self.expression( 1555 exp.LockingProperty, 1556 this=this, 1557 kind=kind, 1558 for_or_in=for_or_in, 1559 lock_type=lock_type, 1560 override=override, 1561 ) 1562 1563 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1564 if self._match(TokenType.PARTITION_BY): 1565 return self._parse_csv(self._parse_conjunction) 1566 return [] 1567 1568 def _parse_partitioned_by(self) -> exp.Expression: 1569 self._match(TokenType.EQ) 1570 return self.expression( 1571 exp.PartitionedByProperty, 1572 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1573 ) 1574 1575 def _parse_withdata(self, no=False) -> exp.Expression: 1576 if self._match_text_seq("AND", "STATISTICS"): 1577 statistics = True 1578 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1579 statistics = False 1580 else: 1581 statistics = None 1582 1583 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1584 1585 def _parse_noprimaryindex(self) -> exp.Expression: 1586 self._match_text_seq("PRIMARY", "INDEX") 1587 return exp.NoPrimaryIndexProperty() 1588 1589 def _parse_oncommit(self) -> exp.Expression: 1590 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1591 return exp.OnCommitProperty() 1592 1593 def _parse_distkey(self) -> exp.Expression: 1594 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1595 1596 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1597 table = self._parse_table(schema=True) 1598 options = [] 1599 while self._match_texts(("INCLUDING", "EXCLUDING")): 1600 this = self._prev.text.upper() 1601 id_var = self._parse_id_var() 1602 1603 if not id_var: 1604 return None 1605 1606 options.append( 1607 self.expression( 1608 exp.Property, 1609 this=this, 1610 value=exp.Var(this=id_var.this.upper()), 1611 ) 1612 ) 1613 return self.expression(exp.LikeProperty, this=table, expressions=options) 1614 1615 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1616 return self.expression( 1617 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1618 ) 1619 1620 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1621 self._match(TokenType.EQ) 1622 return self.expression( 1623 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1624 ) 1625 1626 def _parse_returns(self) -> exp.Expression: 1627 value: t.Optional[exp.Expression] 1628 is_table = self._match(TokenType.TABLE) 1629 1630 if is_table: 1631 if self._match(TokenType.LT): 1632 value = self.expression( 1633 exp.Schema, 1634 this="TABLE", 1635 expressions=self._parse_csv(self._parse_struct_types), 1636 ) 1637 if not self._match(TokenType.GT): 1638 self.raise_error("Expecting >") 1639 else: 1640 value = self._parse_schema(exp.Var(this="TABLE")) 1641 else: 1642 value = self._parse_types() 1643 1644 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1645 1646 def _parse_temporary(self, global_=False) -> exp.Expression: 1647 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1648 return self.expression(exp.TemporaryProperty, global_=global_) 1649 1650 def _parse_describe(self) -> exp.Expression: 1651 kind = self._match_set(self.CREATABLES) and self._prev.text 1652 this = self._parse_table() 1653 1654 return self.expression(exp.Describe, this=this, kind=kind) 1655 1656 def _parse_insert(self) -> exp.Expression: 1657 overwrite = self._match(TokenType.OVERWRITE) 1658 local = self._match(TokenType.LOCAL) 1659 alternative = None 1660 1661 if self._match_text_seq("DIRECTORY"): 1662 this: t.Optional[exp.Expression] = self.expression( 1663 exp.Directory, 1664 this=self._parse_var_or_string(), 1665 local=local, 1666 row_format=self._parse_row_format(match_row=True), 1667 ) 1668 else: 1669 if self._match(TokenType.OR): 1670 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1671 1672 self._match(TokenType.INTO) 1673 self._match(TokenType.TABLE) 1674 this = self._parse_table(schema=True) 1675 1676 return self.expression( 1677 exp.Insert, 1678 this=this, 1679 exists=self._parse_exists(), 1680 partition=self._parse_partition(), 1681 expression=self._parse_ddl_select(), 1682 conflict=self._parse_on_conflict(), 1683 returning=self._parse_returning(), 1684 overwrite=overwrite, 1685 alternative=alternative, 1686 ) 1687 1688 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1689 conflict = self._match_text_seq("ON", "CONFLICT") 1690 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1691 1692 if not (conflict or duplicate): 1693 return None 1694 1695 nothing = None 1696 expressions = None 1697 key = None 1698 constraint = None 1699 1700 if conflict: 1701 if self._match_text_seq("ON", "CONSTRAINT"): 1702 constraint = self._parse_id_var() 1703 else: 1704 key = self._parse_csv(self._parse_value) 1705 1706 self._match_text_seq("DO") 1707 if self._match_text_seq("NOTHING"): 1708 nothing = True 1709 else: 1710 self._match(TokenType.UPDATE) 1711 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1712 1713 return self.expression( 1714 exp.OnConflict, 1715 duplicate=duplicate, 1716 expressions=expressions, 1717 nothing=nothing, 1718 key=key, 1719 constraint=constraint, 1720 ) 1721 1722 def _parse_returning(self) -> t.Optional[exp.Expression]: 1723 if not self._match(TokenType.RETURNING): 1724 return None 1725 1726 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1727 1728 def _parse_row(self) -> t.Optional[exp.Expression]: 1729 if not self._match(TokenType.FORMAT): 1730 return None 1731 return self._parse_row_format() 1732 1733 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1734 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1735 return None 1736 1737 if self._match_text_seq("SERDE"): 1738 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1739 1740 self._match_text_seq("DELIMITED") 1741 1742 kwargs = {} 1743 1744 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1745 kwargs["fields"] = self._parse_string() 1746 if self._match_text_seq("ESCAPED", "BY"): 1747 kwargs["escaped"] = self._parse_string() 1748 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1749 kwargs["collection_items"] = self._parse_string() 1750 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1751 kwargs["map_keys"] = self._parse_string() 1752 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1753 kwargs["lines"] = self._parse_string() 1754 if self._match_text_seq("NULL", "DEFINED", "AS"): 1755 kwargs["null"] = self._parse_string() 1756 1757 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1758 1759 def _parse_load_data(self) -> exp.Expression: 1760 local = self._match(TokenType.LOCAL) 1761 self._match_text_seq("INPATH") 1762 inpath = self._parse_string() 1763 overwrite = self._match(TokenType.OVERWRITE) 1764 self._match_pair(TokenType.INTO, TokenType.TABLE) 1765 1766 return self.expression( 1767 exp.LoadData, 1768 this=self._parse_table(schema=True), 1769 local=local, 1770 overwrite=overwrite, 1771 inpath=inpath, 1772 partition=self._parse_partition(), 1773 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1774 serde=self._match_text_seq("SERDE") and self._parse_string(), 1775 ) 1776 1777 def _parse_delete(self) -> exp.Expression: 1778 self._match(TokenType.FROM) 1779 1780 return self.expression( 1781 exp.Delete, 1782 this=self._parse_table(), 1783 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1784 where=self._parse_where(), 1785 returning=self._parse_returning(), 1786 ) 1787 1788 def _parse_update(self) -> exp.Expression: 1789 return self.expression( 1790 exp.Update, 1791 **{ # type: ignore 1792 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1793 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1794 "from": self._parse_from(modifiers=True), 1795 "where": self._parse_where(), 1796 "returning": self._parse_returning(), 1797 }, 1798 ) 1799 1800 def _parse_uncache(self) -> exp.Expression: 1801 if not self._match(TokenType.TABLE): 1802 self.raise_error("Expecting TABLE after UNCACHE") 1803 1804 return self.expression( 1805 exp.Uncache, 1806 exists=self._parse_exists(), 1807 this=self._parse_table(schema=True), 1808 ) 1809 1810 def _parse_cache(self) -> exp.Expression: 1811 lazy = self._match(TokenType.LAZY) 1812 self._match(TokenType.TABLE) 1813 table = self._parse_table(schema=True) 1814 options = [] 1815 1816 if self._match(TokenType.OPTIONS): 1817 self._match_l_paren() 1818 k = self._parse_string() 1819 self._match(TokenType.EQ) 1820 v = self._parse_string() 1821 options = [k, v] 1822 self._match_r_paren() 1823 1824 self._match(TokenType.ALIAS) 1825 return self.expression( 1826 exp.Cache, 1827 this=table, 1828 lazy=lazy, 1829 options=options, 1830 expression=self._parse_select(nested=True), 1831 ) 1832 1833 def _parse_partition(self) -> t.Optional[exp.Expression]: 1834 if not self._match(TokenType.PARTITION): 1835 return None 1836 1837 return self.expression( 1838 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1839 ) 1840 1841 def _parse_value(self) -> exp.Expression: 1842 if self._match(TokenType.L_PAREN): 1843 expressions = self._parse_csv(self._parse_conjunction) 1844 self._match_r_paren() 1845 return self.expression(exp.Tuple, expressions=expressions) 1846 1847 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1848 # Source: https://prestodb.io/docs/current/sql/values.html 1849 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1850 1851 def _parse_select( 1852 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1853 ) -> t.Optional[exp.Expression]: 1854 cte = self._parse_with() 1855 if cte: 1856 this = self._parse_statement() 1857 1858 if not this: 1859 self.raise_error("Failed to parse any statement following CTE") 1860 return cte 1861 1862 if "with" in this.arg_types: 1863 this.set("with", cte) 1864 else: 1865 self.raise_error(f"{this.key} does not support CTE") 1866 this = cte 1867 elif self._match(TokenType.SELECT): 1868 comments = self._prev_comments 1869 1870 hint = self._parse_hint() 1871 all_ = self._match(TokenType.ALL) 1872 distinct = self._match(TokenType.DISTINCT) 1873 1874 kind = ( 1875 self._match(TokenType.ALIAS) 1876 and self._match_texts(("STRUCT", "VALUE")) 1877 and self._prev.text 1878 ) 1879 1880 if distinct: 1881 distinct = self.expression( 1882 exp.Distinct, 1883 on=self._parse_value() if self._match(TokenType.ON) else None, 1884 ) 1885 1886 if all_ and distinct: 1887 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1888 1889 limit = self._parse_limit(top=True) 1890 expressions = self._parse_csv(self._parse_expression) 1891 1892 this = self.expression( 1893 exp.Select, 1894 kind=kind, 1895 hint=hint, 1896 distinct=distinct, 1897 expressions=expressions, 1898 limit=limit, 1899 ) 1900 this.comments = comments 1901 1902 into = self._parse_into() 1903 if into: 1904 this.set("into", into) 1905 1906 from_ = self._parse_from() 1907 if from_: 1908 this.set("from", from_) 1909 1910 this = self._parse_query_modifiers(this) 1911 elif (table or nested) and self._match(TokenType.L_PAREN): 1912 this = self._parse_table() if table else self._parse_select(nested=True) 1913 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1914 self._match_r_paren() 1915 1916 # early return so that subquery unions aren't parsed again 1917 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1918 # Union ALL should be a property of the top select node, not the subquery 1919 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1920 elif self._match(TokenType.VALUES): 1921 this = self.expression( 1922 exp.Values, 1923 expressions=self._parse_csv(self._parse_value), 1924 alias=self._parse_table_alias(), 1925 ) 1926 else: 1927 this = None 1928 1929 return self._parse_set_operations(this) 1930 1931 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1932 if not skip_with_token and not self._match(TokenType.WITH): 1933 return None 1934 1935 comments = self._prev_comments 1936 recursive = self._match(TokenType.RECURSIVE) 1937 1938 expressions = [] 1939 while True: 1940 expressions.append(self._parse_cte()) 1941 1942 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1943 break 1944 else: 1945 self._match(TokenType.WITH) 1946 1947 return self.expression( 1948 exp.With, comments=comments, expressions=expressions, recursive=recursive 1949 ) 1950 1951 def _parse_cte(self) -> exp.Expression: 1952 alias = self._parse_table_alias() 1953 if not alias or not alias.this: 1954 self.raise_error("Expected CTE to have alias") 1955 1956 self._match(TokenType.ALIAS) 1957 1958 return self.expression( 1959 exp.CTE, 1960 this=self._parse_wrapped(self._parse_statement), 1961 alias=alias, 1962 ) 1963 1964 def _parse_table_alias( 1965 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1966 ) -> t.Optional[exp.Expression]: 1967 any_token = self._match(TokenType.ALIAS) 1968 alias = ( 1969 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1970 or self._parse_string_as_identifier() 1971 ) 1972 1973 index = self._index 1974 if self._match(TokenType.L_PAREN): 1975 columns = self._parse_csv(self._parse_function_parameter) 1976 self._match_r_paren() if columns else self._retreat(index) 1977 else: 1978 columns = None 1979 1980 if not alias and not columns: 1981 return None 1982 1983 return self.expression(exp.TableAlias, this=alias, columns=columns) 1984 1985 def _parse_subquery( 1986 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1987 ) -> exp.Expression: 1988 return self.expression( 1989 exp.Subquery, 1990 this=this, 1991 pivots=self._parse_pivots(), 1992 alias=self._parse_table_alias() if parse_alias else None, 1993 ) 1994 1995 def _parse_query_modifiers( 1996 self, this: t.Optional[exp.Expression] 1997 ) -> t.Optional[exp.Expression]: 1998 if isinstance(this, self.MODIFIABLES): 1999 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2000 expression = parser(self) 2001 2002 if expression: 2003 this.set(key, expression) 2004 return this 2005 2006 def _parse_hint(self) -> t.Optional[exp.Expression]: 2007 if self._match(TokenType.HINT): 2008 hints = self._parse_csv(self._parse_function) 2009 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2010 self.raise_error("Expected */ after HINT") 2011 return self.expression(exp.Hint, expressions=hints) 2012 2013 return None 2014 2015 def _parse_into(self) -> t.Optional[exp.Expression]: 2016 if not self._match(TokenType.INTO): 2017 return None 2018 2019 temp = self._match(TokenType.TEMPORARY) 2020 unlogged = self._match(TokenType.UNLOGGED) 2021 self._match(TokenType.TABLE) 2022 2023 return self.expression( 2024 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2025 ) 2026 2027 def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]: 2028 if not self._match(TokenType.FROM): 2029 return None 2030 2031 comments = self._prev_comments 2032 this = self._parse_table() 2033 2034 return self.expression( 2035 exp.From, 2036 comments=comments, 2037 this=self._parse_query_modifiers(this) if modifiers else this, 2038 ) 2039 2040 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 2041 if not self._match(TokenType.MATCH_RECOGNIZE): 2042 return None 2043 2044 self._match_l_paren() 2045 2046 partition = self._parse_partition_by() 2047 order = self._parse_order() 2048 measures = ( 2049 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2050 ) 2051 2052 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2053 rows = exp.Var(this="ONE ROW PER MATCH") 2054 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2055 text = "ALL ROWS PER MATCH" 2056 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2057 text += f" SHOW EMPTY MATCHES" 2058 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2059 text += f" OMIT EMPTY MATCHES" 2060 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2061 text += f" WITH UNMATCHED ROWS" 2062 rows = exp.Var(this=text) 2063 else: 2064 rows = None 2065 2066 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2067 text = "AFTER MATCH SKIP" 2068 if self._match_text_seq("PAST", "LAST", "ROW"): 2069 text += f" PAST LAST ROW" 2070 elif self._match_text_seq("TO", "NEXT", "ROW"): 2071 text += f" TO NEXT ROW" 2072 elif self._match_text_seq("TO", "FIRST"): 2073 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2074 elif self._match_text_seq("TO", "LAST"): 2075 text += f" TO LAST {self._advance_any().text}" # type: ignore 2076 after = exp.Var(this=text) 2077 else: 2078 after = None 2079 2080 if self._match_text_seq("PATTERN"): 2081 self._match_l_paren() 2082 2083 if not self._curr: 2084 self.raise_error("Expecting )", self._curr) 2085 2086 paren = 1 2087 start = self._curr 2088 2089 while self._curr and paren > 0: 2090 if self._curr.token_type == TokenType.L_PAREN: 2091 paren += 1 2092 if self._curr.token_type == TokenType.R_PAREN: 2093 paren -= 1 2094 end = self._prev 2095 self._advance() 2096 if paren > 0: 2097 self.raise_error("Expecting )", self._curr) 2098 pattern = exp.Var(this=self._find_sql(start, end)) 2099 else: 2100 pattern = None 2101 2102 define = ( 2103 self._parse_csv( 2104 lambda: self.expression( 2105 exp.Alias, 2106 alias=self._parse_id_var(any_token=True), 2107 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2108 ) 2109 ) 2110 if self._match_text_seq("DEFINE") 2111 else None 2112 ) 2113 2114 self._match_r_paren() 2115 2116 return self.expression( 2117 exp.MatchRecognize, 2118 partition_by=partition, 2119 order=order, 2120 measures=measures, 2121 rows=rows, 2122 after=after, 2123 pattern=pattern, 2124 define=define, 2125 alias=self._parse_table_alias(), 2126 ) 2127 2128 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2129 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2130 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2131 2132 if outer_apply or cross_apply: 2133 this = self._parse_select(table=True) 2134 view = None 2135 outer = not cross_apply 2136 elif self._match(TokenType.LATERAL): 2137 this = self._parse_select(table=True) 2138 view = self._match(TokenType.VIEW) 2139 outer = self._match(TokenType.OUTER) 2140 else: 2141 return None 2142 2143 if not this: 2144 this = self._parse_function() or self._parse_id_var(any_token=False) 2145 while self._match(TokenType.DOT): 2146 this = exp.Dot( 2147 this=this, 2148 expression=self._parse_function() or self._parse_id_var(any_token=False), 2149 ) 2150 2151 table_alias: t.Optional[exp.Expression] 2152 2153 if view: 2154 table = self._parse_id_var(any_token=False) 2155 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2156 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2157 else: 2158 table_alias = self._parse_table_alias() 2159 2160 expression = self.expression( 2161 exp.Lateral, 2162 this=this, 2163 view=view, 2164 outer=outer, 2165 alias=table_alias, 2166 ) 2167 2168 return expression 2169 2170 def _parse_join_side_and_kind( 2171 self, 2172 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2173 return ( 2174 self._match(TokenType.NATURAL) and self._prev, 2175 self._match_set(self.JOIN_SIDES) and self._prev, 2176 self._match_set(self.JOIN_KINDS) and self._prev, 2177 ) 2178 2179 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2180 if self._match(TokenType.COMMA): 2181 return self.expression(exp.Join, this=self._parse_table()) 2182 2183 index = self._index 2184 natural, side, kind = self._parse_join_side_and_kind() 2185 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2186 join = self._match(TokenType.JOIN) 2187 2188 if not skip_join_token and not join: 2189 self._retreat(index) 2190 kind = None 2191 natural = None 2192 side = None 2193 2194 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2195 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2196 2197 if not skip_join_token and not join and not outer_apply and not cross_apply: 2198 return None 2199 2200 if outer_apply: 2201 side = Token(TokenType.LEFT, "LEFT") 2202 2203 kwargs: t.Dict[ 2204 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2205 ] = {"this": self._parse_table()} 2206 2207 if natural: 2208 kwargs["natural"] = True 2209 if side: 2210 kwargs["side"] = side.text 2211 if kind: 2212 kwargs["kind"] = kind.text 2213 if hint: 2214 kwargs["hint"] = hint 2215 2216 if self._match(TokenType.ON): 2217 kwargs["on"] = self._parse_conjunction() 2218 elif self._match(TokenType.USING): 2219 kwargs["using"] = self._parse_wrapped_id_vars() 2220 2221 return self.expression(exp.Join, **kwargs) # type: ignore 2222 2223 def _parse_index(self) -> exp.Expression: 2224 index = self._parse_id_var() 2225 self._match(TokenType.ON) 2226 self._match(TokenType.TABLE) # hive 2227 2228 return self.expression( 2229 exp.Index, 2230 this=index, 2231 table=self.expression(exp.Table, this=self._parse_id_var()), 2232 columns=self._parse_expression(), 2233 ) 2234 2235 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2236 unique = self._match(TokenType.UNIQUE) 2237 primary = self._match_text_seq("PRIMARY") 2238 amp = self._match_text_seq("AMP") 2239 if not self._match(TokenType.INDEX): 2240 return None 2241 index = self._parse_id_var() 2242 columns = None 2243 if self._match(TokenType.L_PAREN, advance=False): 2244 columns = self._parse_wrapped_csv(self._parse_column) 2245 return self.expression( 2246 exp.Index, 2247 this=index, 2248 columns=columns, 2249 unique=unique, 2250 primary=primary, 2251 amp=amp, 2252 ) 2253 2254 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2255 return ( 2256 (not schema and self._parse_function()) 2257 or self._parse_id_var(any_token=False) 2258 or self._parse_string_as_identifier() 2259 or self._parse_placeholder() 2260 ) 2261 2262 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2263 catalog = None 2264 db = None 2265 table = self._parse_table_part(schema=schema) 2266 2267 while self._match(TokenType.DOT): 2268 if catalog: 2269 # This allows nesting the table in arbitrarily many dot expressions if needed 2270 table = self.expression( 2271 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2272 ) 2273 else: 2274 catalog = db 2275 db = table 2276 table = self._parse_table_part(schema=schema) 2277 2278 if not table: 2279 self.raise_error(f"Expected table name but got {self._curr}") 2280 2281 return self.expression( 2282 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2283 ) 2284 2285 def _parse_table( 2286 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2287 ) -> t.Optional[exp.Expression]: 2288 lateral = self._parse_lateral() 2289 if lateral: 2290 return lateral 2291 2292 unnest = self._parse_unnest() 2293 if unnest: 2294 return unnest 2295 2296 values = self._parse_derived_table_values() 2297 if values: 2298 return values 2299 2300 subquery = self._parse_select(table=True) 2301 if subquery: 2302 if not subquery.args.get("pivots"): 2303 subquery.set("pivots", self._parse_pivots()) 2304 return subquery 2305 2306 this = self._parse_table_parts(schema=schema) 2307 2308 if schema: 2309 return self._parse_schema(this=this) 2310 2311 if self.alias_post_tablesample: 2312 table_sample = self._parse_table_sample() 2313 2314 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2315 if alias: 2316 this.set("alias", alias) 2317 2318 if not this.args.get("pivots"): 2319 this.set("pivots", self._parse_pivots()) 2320 2321 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2322 this.set( 2323 "hints", 2324 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2325 ) 2326 self._match_r_paren() 2327 2328 if not self.alias_post_tablesample: 2329 table_sample = self._parse_table_sample() 2330 2331 if table_sample: 2332 table_sample.set("this", this) 2333 this = table_sample 2334 2335 return this 2336 2337 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2338 if not self._match(TokenType.UNNEST): 2339 return None 2340 2341 expressions = self._parse_wrapped_csv(self._parse_type) 2342 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2343 alias = self._parse_table_alias() 2344 2345 if alias and self.unnest_column_only: 2346 if alias.args.get("columns"): 2347 self.raise_error("Unexpected extra column alias in unnest.") 2348 alias.set("columns", [alias.this]) 2349 alias.set("this", None) 2350 2351 offset = None 2352 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2353 self._match(TokenType.ALIAS) 2354 offset = self._parse_id_var() or exp.Identifier(this="offset") 2355 2356 return self.expression( 2357 exp.Unnest, 2358 expressions=expressions, 2359 ordinality=ordinality, 2360 alias=alias, 2361 offset=offset, 2362 ) 2363 2364 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2365 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2366 if not is_derived and not self._match(TokenType.VALUES): 2367 return None 2368 2369 expressions = self._parse_csv(self._parse_value) 2370 2371 if is_derived: 2372 self._match_r_paren() 2373 2374 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2375 2376 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2377 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2378 as_modifier and self._match_text_seq("USING", "SAMPLE") 2379 ): 2380 return None 2381 2382 bucket_numerator = None 2383 bucket_denominator = None 2384 bucket_field = None 2385 percent = None 2386 rows = None 2387 size = None 2388 seed = None 2389 2390 kind = ( 2391 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2392 ) 2393 method = self._parse_var(tokens=(TokenType.ROW,)) 2394 2395 self._match(TokenType.L_PAREN) 2396 2397 num = self._parse_number() 2398 2399 if self._match(TokenType.BUCKET): 2400 bucket_numerator = self._parse_number() 2401 self._match(TokenType.OUT_OF) 2402 bucket_denominator = bucket_denominator = self._parse_number() 2403 self._match(TokenType.ON) 2404 bucket_field = self._parse_field() 2405 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2406 percent = num 2407 elif self._match(TokenType.ROWS): 2408 rows = num 2409 else: 2410 size = num 2411 2412 self._match(TokenType.R_PAREN) 2413 2414 if self._match(TokenType.L_PAREN): 2415 method = self._parse_var() 2416 seed = self._match(TokenType.COMMA) and self._parse_number() 2417 self._match_r_paren() 2418 elif self._match_texts(("SEED", "REPEATABLE")): 2419 seed = self._parse_wrapped(self._parse_number) 2420 2421 return self.expression( 2422 exp.TableSample, 2423 method=method, 2424 bucket_numerator=bucket_numerator, 2425 bucket_denominator=bucket_denominator, 2426 bucket_field=bucket_field, 2427 percent=percent, 2428 rows=rows, 2429 size=size, 2430 seed=seed, 2431 kind=kind, 2432 ) 2433 2434 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2435 return list(iter(self._parse_pivot, None)) 2436 2437 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2438 index = self._index 2439 2440 if self._match(TokenType.PIVOT): 2441 unpivot = False 2442 elif self._match(TokenType.UNPIVOT): 2443 unpivot = True 2444 else: 2445 return None 2446 2447 expressions = [] 2448 field = None 2449 2450 if not self._match(TokenType.L_PAREN): 2451 self._retreat(index) 2452 return None 2453 2454 if unpivot: 2455 expressions = self._parse_csv(self._parse_column) 2456 else: 2457 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2458 2459 if not expressions: 2460 self.raise_error("Failed to parse PIVOT's aggregation list") 2461 2462 if not self._match(TokenType.FOR): 2463 self.raise_error("Expecting FOR") 2464 2465 value = self._parse_column() 2466 2467 if not self._match(TokenType.IN): 2468 self.raise_error("Expecting IN") 2469 2470 field = self._parse_in(value) 2471 2472 self._match_r_paren() 2473 2474 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2475 2476 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2477 pivot.set("alias", self._parse_table_alias()) 2478 2479 if not unpivot: 2480 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2481 2482 columns: t.List[exp.Expression] = [] 2483 for fld in pivot.args["field"].expressions: 2484 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2485 for name in names: 2486 if self.PREFIXED_PIVOT_COLUMNS: 2487 name = f"{name}_{field_name}" if name else field_name 2488 else: 2489 name = f"{field_name}_{name}" if name else field_name 2490 2491 columns.append(exp.to_identifier(name)) 2492 2493 pivot.set("columns", columns) 2494 2495 return pivot 2496 2497 def _pivot_column_names(self, pivot_columns: t.List[exp.Expression]) -> t.List[str]: 2498 return [agg.alias for agg in pivot_columns] 2499 2500 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2501 if not skip_where_token and not self._match(TokenType.WHERE): 2502 return None 2503 2504 return self.expression( 2505 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2506 ) 2507 2508 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2509 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2510 return None 2511 2512 elements = defaultdict(list) 2513 2514 while True: 2515 expressions = self._parse_csv(self._parse_conjunction) 2516 if expressions: 2517 elements["expressions"].extend(expressions) 2518 2519 grouping_sets = self._parse_grouping_sets() 2520 if grouping_sets: 2521 elements["grouping_sets"].extend(grouping_sets) 2522 2523 rollup = None 2524 cube = None 2525 totals = None 2526 2527 with_ = self._match(TokenType.WITH) 2528 if self._match(TokenType.ROLLUP): 2529 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2530 elements["rollup"].extend(ensure_list(rollup)) 2531 2532 if self._match(TokenType.CUBE): 2533 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2534 elements["cube"].extend(ensure_list(cube)) 2535 2536 if self._match_text_seq("TOTALS"): 2537 totals = True 2538 elements["totals"] = True # type: ignore 2539 2540 if not (grouping_sets or rollup or cube or totals): 2541 break 2542 2543 return self.expression(exp.Group, **elements) # type: ignore 2544 2545 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2546 if not self._match(TokenType.GROUPING_SETS): 2547 return None 2548 2549 return self._parse_wrapped_csv(self._parse_grouping_set) 2550 2551 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2552 if self._match(TokenType.L_PAREN): 2553 grouping_set = self._parse_csv(self._parse_column) 2554 self._match_r_paren() 2555 return self.expression(exp.Tuple, expressions=grouping_set) 2556 2557 return self._parse_column() 2558 2559 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2560 if not skip_having_token and not self._match(TokenType.HAVING): 2561 return None 2562 return self.expression(exp.Having, this=self._parse_conjunction()) 2563 2564 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2565 if not self._match(TokenType.QUALIFY): 2566 return None 2567 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2568 2569 def _parse_order( 2570 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2571 ) -> t.Optional[exp.Expression]: 2572 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2573 return this 2574 2575 return self.expression( 2576 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2577 ) 2578 2579 def _parse_sort( 2580 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2581 ) -> t.Optional[exp.Expression]: 2582 if not self._match(token_type): 2583 return None 2584 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2585 2586 def _parse_ordered(self) -> exp.Expression: 2587 this = self._parse_conjunction() 2588 self._match(TokenType.ASC) 2589 is_desc = self._match(TokenType.DESC) 2590 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2591 is_nulls_last = self._match(TokenType.NULLS_LAST) 2592 desc = is_desc or False 2593 asc = not desc 2594 nulls_first = is_nulls_first or False 2595 explicitly_null_ordered = is_nulls_first or is_nulls_last 2596 if ( 2597 not explicitly_null_ordered 2598 and ( 2599 (asc and self.null_ordering == "nulls_are_small") 2600 or (desc and self.null_ordering != "nulls_are_small") 2601 ) 2602 and self.null_ordering != "nulls_are_last" 2603 ): 2604 nulls_first = True 2605 2606 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2607 2608 def _parse_limit( 2609 self, this: t.Optional[exp.Expression] = None, top: bool = False 2610 ) -> t.Optional[exp.Expression]: 2611 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2612 limit_paren = self._match(TokenType.L_PAREN) 2613 limit_exp = self.expression( 2614 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2615 ) 2616 2617 if limit_paren: 2618 self._match_r_paren() 2619 2620 return limit_exp 2621 2622 if self._match(TokenType.FETCH): 2623 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2624 direction = self._prev.text if direction else "FIRST" 2625 2626 count = self._parse_number() 2627 percent = self._match(TokenType.PERCENT) 2628 2629 self._match_set((TokenType.ROW, TokenType.ROWS)) 2630 2631 only = self._match(TokenType.ONLY) 2632 with_ties = self._match_text_seq("WITH", "TIES") 2633 2634 if only and with_ties: 2635 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2636 2637 return self.expression( 2638 exp.Fetch, 2639 direction=direction, 2640 count=count, 2641 percent=percent, 2642 with_ties=with_ties, 2643 ) 2644 2645 return this 2646 2647 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2648 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2649 return this 2650 2651 count = self._parse_number() 2652 self._match_set((TokenType.ROW, TokenType.ROWS)) 2653 return self.expression(exp.Offset, this=this, expression=count) 2654 2655 def _parse_lock(self) -> t.Optional[exp.Expression]: 2656 if self._match_text_seq("FOR", "UPDATE"): 2657 return self.expression(exp.Lock, update=True) 2658 if self._match_text_seq("FOR", "SHARE"): 2659 return self.expression(exp.Lock, update=False) 2660 2661 return None 2662 2663 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2664 if not self._match_set(self.SET_OPERATIONS): 2665 return this 2666 2667 token_type = self._prev.token_type 2668 2669 if token_type == TokenType.UNION: 2670 expression = exp.Union 2671 elif token_type == TokenType.EXCEPT: 2672 expression = exp.Except 2673 else: 2674 expression = exp.Intersect 2675 2676 return self.expression( 2677 expression, 2678 this=this, 2679 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2680 expression=self._parse_set_operations(self._parse_select(nested=True)), 2681 ) 2682 2683 def _parse_expression(self, explicit_alias: bool = False) -> t.Optional[exp.Expression]: 2684 return self._parse_alias(self._parse_conjunction(), explicit=explicit_alias) 2685 2686 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2687 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2688 2689 def _parse_equality(self) -> t.Optional[exp.Expression]: 2690 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2691 2692 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2693 return self._parse_tokens(self._parse_range, self.COMPARISON) 2694 2695 def _parse_range(self) -> t.Optional[exp.Expression]: 2696 this = self._parse_bitwise() 2697 negate = self._match(TokenType.NOT) 2698 2699 if self._match_set(self.RANGE_PARSERS): 2700 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2701 if not expression: 2702 return this 2703 2704 this = expression 2705 elif self._match(TokenType.ISNULL): 2706 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2707 2708 # Postgres supports ISNULL and NOTNULL for conditions. 2709 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2710 if self._match(TokenType.NOTNULL): 2711 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2712 this = self.expression(exp.Not, this=this) 2713 2714 if negate: 2715 this = self.expression(exp.Not, this=this) 2716 2717 if self._match(TokenType.IS): 2718 this = self._parse_is(this) 2719 2720 return this 2721 2722 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2723 index = self._index - 1 2724 negate = self._match(TokenType.NOT) 2725 if self._match(TokenType.DISTINCT_FROM): 2726 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2727 return self.expression(klass, this=this, expression=self._parse_expression()) 2728 2729 expression = self._parse_null() or self._parse_boolean() 2730 if not expression: 2731 self._retreat(index) 2732 return None 2733 2734 this = self.expression(exp.Is, this=this, expression=expression) 2735 return self.expression(exp.Not, this=this) if negate else this 2736 2737 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2738 unnest = self._parse_unnest() 2739 if unnest: 2740 this = self.expression(exp.In, this=this, unnest=unnest) 2741 elif self._match(TokenType.L_PAREN): 2742 expressions = self._parse_csv(self._parse_select_or_expression) 2743 2744 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2745 this = self.expression(exp.In, this=this, query=expressions[0]) 2746 else: 2747 this = self.expression(exp.In, this=this, expressions=expressions) 2748 2749 self._match_r_paren(this) 2750 else: 2751 this = self.expression(exp.In, this=this, field=self._parse_field()) 2752 2753 return this 2754 2755 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2756 low = self._parse_bitwise() 2757 self._match(TokenType.AND) 2758 high = self._parse_bitwise() 2759 return self.expression(exp.Between, this=this, low=low, high=high) 2760 2761 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2762 if not self._match(TokenType.ESCAPE): 2763 return this 2764 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2765 2766 def _parse_interval(self) -> t.Optional[exp.Expression]: 2767 if not self._match(TokenType.INTERVAL): 2768 return None 2769 2770 this = self._parse_primary() or self._parse_term() 2771 unit = self._parse_function() or self._parse_var() 2772 2773 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2774 # each INTERVAL expression into this canonical form so it's easy to transpile 2775 if this and isinstance(this, exp.Literal): 2776 if this.is_number: 2777 this = exp.Literal.string(this.name) 2778 2779 # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year' 2780 parts = this.name.split() 2781 if not unit and len(parts) <= 2: 2782 this = exp.Literal.string(seq_get(parts, 0)) 2783 unit = self.expression(exp.Var, this=seq_get(parts, 1)) 2784 2785 return self.expression(exp.Interval, this=this, unit=unit) 2786 2787 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2788 this = self._parse_term() 2789 2790 while True: 2791 if self._match_set(self.BITWISE): 2792 this = self.expression( 2793 self.BITWISE[self._prev.token_type], 2794 this=this, 2795 expression=self._parse_term(), 2796 ) 2797 elif self._match_pair(TokenType.LT, TokenType.LT): 2798 this = self.expression( 2799 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2800 ) 2801 elif self._match_pair(TokenType.GT, TokenType.GT): 2802 this = self.expression( 2803 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2804 ) 2805 else: 2806 break 2807 2808 return this 2809 2810 def _parse_term(self) -> t.Optional[exp.Expression]: 2811 return self._parse_tokens(self._parse_factor, self.TERM) 2812 2813 def _parse_factor(self) -> t.Optional[exp.Expression]: 2814 return self._parse_tokens(self._parse_unary, self.FACTOR) 2815 2816 def _parse_unary(self) -> t.Optional[exp.Expression]: 2817 if self._match_set(self.UNARY_PARSERS): 2818 return self.UNARY_PARSERS[self._prev.token_type](self) 2819 return self._parse_at_time_zone(self._parse_type()) 2820 2821 def _parse_type(self) -> t.Optional[exp.Expression]: 2822 interval = self._parse_interval() 2823 if interval: 2824 return interval 2825 2826 index = self._index 2827 data_type = self._parse_types(check_func=True) 2828 this = self._parse_column() 2829 2830 if data_type: 2831 if isinstance(this, exp.Literal): 2832 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2833 if parser: 2834 return parser(self, this, data_type) 2835 return self.expression(exp.Cast, this=this, to=data_type) 2836 if not data_type.expressions: 2837 self._retreat(index) 2838 return self._parse_column() 2839 return data_type 2840 2841 return this 2842 2843 def _parse_type_size(self) -> t.Optional[exp.Expression]: 2844 this = self._parse_type() 2845 if not this: 2846 return None 2847 2848 return self.expression( 2849 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2850 ) 2851 2852 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2853 index = self._index 2854 2855 prefix = self._match_text_seq("SYSUDTLIB", ".") 2856 2857 if not self._match_set(self.TYPE_TOKENS): 2858 return None 2859 2860 type_token = self._prev.token_type 2861 2862 if type_token == TokenType.PSEUDO_TYPE: 2863 return self.expression(exp.PseudoType, this=self._prev.text) 2864 2865 nested = type_token in self.NESTED_TYPE_TOKENS 2866 is_struct = type_token == TokenType.STRUCT 2867 expressions = None 2868 maybe_func = False 2869 2870 if self._match(TokenType.L_PAREN): 2871 if is_struct: 2872 expressions = self._parse_csv(self._parse_struct_types) 2873 elif nested: 2874 expressions = self._parse_csv(self._parse_types) 2875 else: 2876 expressions = self._parse_csv(self._parse_type_size) 2877 2878 if not expressions or not self._match(TokenType.R_PAREN): 2879 self._retreat(index) 2880 return None 2881 2882 maybe_func = True 2883 2884 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2885 this = exp.DataType( 2886 this=exp.DataType.Type.ARRAY, 2887 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2888 nested=True, 2889 ) 2890 2891 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2892 this = exp.DataType( 2893 this=exp.DataType.Type.ARRAY, 2894 expressions=[this], 2895 nested=True, 2896 ) 2897 2898 return this 2899 2900 if self._match(TokenType.L_BRACKET): 2901 self._retreat(index) 2902 return None 2903 2904 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2905 if nested and self._match(TokenType.LT): 2906 if is_struct: 2907 expressions = self._parse_csv(self._parse_struct_types) 2908 else: 2909 expressions = self._parse_csv(self._parse_types) 2910 2911 if not self._match(TokenType.GT): 2912 self.raise_error("Expecting >") 2913 2914 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2915 values = self._parse_csv(self._parse_conjunction) 2916 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2917 2918 value: t.Optional[exp.Expression] = None 2919 if type_token in self.TIMESTAMPS: 2920 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2921 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2922 elif ( 2923 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2924 ): 2925 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2926 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2927 if type_token == TokenType.TIME: 2928 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2929 else: 2930 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2931 2932 maybe_func = maybe_func and value is None 2933 2934 if value is None: 2935 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2936 elif type_token == TokenType.INTERVAL: 2937 unit = self._parse_var() 2938 2939 if not unit: 2940 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2941 else: 2942 value = self.expression(exp.Interval, unit=unit) 2943 2944 if maybe_func and check_func: 2945 index2 = self._index 2946 peek = self._parse_string() 2947 2948 if not peek: 2949 self._retreat(index) 2950 return None 2951 2952 self._retreat(index2) 2953 2954 if value: 2955 return value 2956 2957 return exp.DataType( 2958 this=exp.DataType.Type[type_token.value.upper()], 2959 expressions=expressions, 2960 nested=nested, 2961 values=values, 2962 prefix=prefix, 2963 ) 2964 2965 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 2966 this = self._parse_type() or self._parse_id_var() 2967 self._match(TokenType.COLON) 2968 return self._parse_column_def(this) 2969 2970 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2971 if not self._match(TokenType.AT_TIME_ZONE): 2972 return this 2973 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2974 2975 def _parse_column(self) -> t.Optional[exp.Expression]: 2976 this = self._parse_field() 2977 if isinstance(this, exp.Identifier): 2978 this = self.expression(exp.Column, this=this) 2979 elif not this: 2980 return self._parse_bracket(this) 2981 this = self._parse_bracket(this) 2982 2983 while self._match_set(self.COLUMN_OPERATORS): 2984 op_token = self._prev.token_type 2985 op = self.COLUMN_OPERATORS.get(op_token) 2986 2987 if op_token == TokenType.DCOLON: 2988 field = self._parse_types() 2989 if not field: 2990 self.raise_error("Expected type") 2991 elif op and self._curr: 2992 self._advance() 2993 value = self._prev.text 2994 field = ( 2995 exp.Literal.number(value) 2996 if self._prev.token_type == TokenType.NUMBER 2997 else exp.Literal.string(value) 2998 ) 2999 else: 3000 field = ( 3001 self._parse_star() 3002 or self._parse_function(anonymous=True) 3003 or self._parse_id_var() 3004 ) 3005 3006 if isinstance(field, exp.Func): 3007 # bigquery allows function calls like x.y.count(...) 3008 # SAFE.SUBSTR(...) 3009 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3010 this = self._replace_columns_with_dots(this) 3011 3012 if op: 3013 this = op(self, this, field) 3014 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3015 this = self.expression( 3016 exp.Column, 3017 this=field, 3018 table=this.this, 3019 db=this.args.get("table"), 3020 catalog=this.args.get("db"), 3021 ) 3022 else: 3023 this = self.expression(exp.Dot, this=this, expression=field) 3024 this = self._parse_bracket(this) 3025 3026 return this 3027 3028 def _parse_primary(self) -> t.Optional[exp.Expression]: 3029 if self._match_set(self.PRIMARY_PARSERS): 3030 token_type = self._prev.token_type 3031 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3032 3033 if token_type == TokenType.STRING: 3034 expressions = [primary] 3035 while self._match(TokenType.STRING): 3036 expressions.append(exp.Literal.string(self._prev.text)) 3037 if len(expressions) > 1: 3038 return self.expression(exp.Concat, expressions=expressions) 3039 return primary 3040 3041 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3042 return exp.Literal.number(f"0.{self._prev.text}") 3043 3044 if self._match(TokenType.L_PAREN): 3045 comments = self._prev_comments 3046 query = self._parse_select() 3047 3048 if query: 3049 expressions = [query] 3050 else: 3051 expressions = self._parse_csv(lambda: self._parse_expression(explicit_alias=True)) 3052 3053 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3054 3055 if isinstance(this, exp.Subqueryable): 3056 this = self._parse_set_operations( 3057 self._parse_subquery(this=this, parse_alias=False) 3058 ) 3059 elif len(expressions) > 1: 3060 this = self.expression(exp.Tuple, expressions=expressions) 3061 else: 3062 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3063 3064 if this: 3065 this.add_comments(comments) 3066 self._match_r_paren(expression=this) 3067 3068 return this 3069 3070 return None 3071 3072 def _parse_field( 3073 self, 3074 any_token: bool = False, 3075 tokens: t.Optional[t.Collection[TokenType]] = None, 3076 ) -> t.Optional[exp.Expression]: 3077 return ( 3078 self._parse_primary() 3079 or self._parse_function() 3080 or self._parse_id_var(any_token=any_token, tokens=tokens) 3081 ) 3082 3083 def _parse_function( 3084 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3085 ) -> t.Optional[exp.Expression]: 3086 if not self._curr: 3087 return None 3088 3089 token_type = self._curr.token_type 3090 3091 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3092 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3093 3094 if not self._next or self._next.token_type != TokenType.L_PAREN: 3095 if token_type in self.NO_PAREN_FUNCTIONS: 3096 self._advance() 3097 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3098 3099 return None 3100 3101 if token_type not in self.FUNC_TOKENS: 3102 return None 3103 3104 this = self._curr.text 3105 upper = this.upper() 3106 self._advance(2) 3107 3108 parser = self.FUNCTION_PARSERS.get(upper) 3109 3110 if parser and not anonymous: 3111 this = parser(self) 3112 else: 3113 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3114 3115 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3116 this = self.expression(subquery_predicate, this=self._parse_select()) 3117 self._match_r_paren() 3118 return this 3119 3120 if functions is None: 3121 functions = self.FUNCTIONS 3122 3123 function = functions.get(upper) 3124 args = self._parse_csv(self._parse_lambda) 3125 3126 if function and not anonymous: 3127 this = function(args) 3128 self.validate_expression(this, args) 3129 else: 3130 this = self.expression(exp.Anonymous, this=this, expressions=args) 3131 3132 self._match_r_paren(this) 3133 return self._parse_window(this) 3134 3135 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3136 return self._parse_column_def(self._parse_id_var()) 3137 3138 def _parse_user_defined_function( 3139 self, kind: t.Optional[TokenType] = None 3140 ) -> t.Optional[exp.Expression]: 3141 this = self._parse_id_var() 3142 3143 while self._match(TokenType.DOT): 3144 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3145 3146 if not self._match(TokenType.L_PAREN): 3147 return this 3148 3149 expressions = self._parse_csv(self._parse_function_parameter) 3150 self._match_r_paren() 3151 return self.expression( 3152 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3153 ) 3154 3155 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3156 literal = self._parse_primary() 3157 if literal: 3158 return self.expression(exp.Introducer, this=token.text, expression=literal) 3159 3160 return self.expression(exp.Identifier, this=token.text) 3161 3162 def _parse_national(self, token: Token) -> exp.Expression: 3163 return self.expression(exp.National, this=exp.Literal.string(token.text)) 3164 3165 def _parse_session_parameter(self) -> exp.Expression: 3166 kind = None 3167 this = self._parse_id_var() or self._parse_primary() 3168 3169 if this and self._match(TokenType.DOT): 3170 kind = this.name 3171 this = self._parse_var() or self._parse_primary() 3172 3173 return self.expression(exp.SessionParameter, this=this, kind=kind) 3174 3175 def _parse_lambda(self) -> t.Optional[exp.Expression]: 3176 index = self._index 3177 3178 if self._match(TokenType.L_PAREN): 3179 expressions = self._parse_csv(self._parse_id_var) 3180 3181 if not self._match(TokenType.R_PAREN): 3182 self._retreat(index) 3183 else: 3184 expressions = [self._parse_id_var()] 3185 3186 if self._match_set(self.LAMBDAS): 3187 return self.LAMBDAS[self._prev.token_type](self, expressions) 3188 3189 self._retreat(index) 3190 3191 this: t.Optional[exp.Expression] 3192 3193 if self._match(TokenType.DISTINCT): 3194 this = self.expression( 3195 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3196 ) 3197 else: 3198 this = self._parse_select_or_expression() 3199 3200 if isinstance(this, exp.EQ): 3201 left = this.this 3202 if isinstance(left, exp.Column): 3203 left.replace(exp.Var(this=left.text("this"))) 3204 3205 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3206 3207 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3208 index = self._index 3209 3210 try: 3211 if self._parse_select(nested=True): 3212 return this 3213 except Exception: 3214 pass 3215 finally: 3216 self._retreat(index) 3217 3218 if not self._match(TokenType.L_PAREN): 3219 return this 3220 3221 args = self._parse_csv( 3222 lambda: self._parse_constraint() 3223 or self._parse_column_def(self._parse_field(any_token=True)) 3224 ) 3225 self._match_r_paren() 3226 return self.expression(exp.Schema, this=this, expressions=args) 3227 3228 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3229 # column defs are not really columns, they're identifiers 3230 if isinstance(this, exp.Column): 3231 this = this.this 3232 kind = self._parse_types() 3233 3234 if self._match_text_seq("FOR", "ORDINALITY"): 3235 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3236 3237 constraints = [] 3238 while True: 3239 constraint = self._parse_column_constraint() 3240 if not constraint: 3241 break 3242 constraints.append(constraint) 3243 3244 if not kind and not constraints: 3245 return this 3246 3247 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3248 3249 def _parse_auto_increment(self) -> exp.Expression: 3250 start = None 3251 increment = None 3252 3253 if self._match(TokenType.L_PAREN, advance=False): 3254 args = self._parse_wrapped_csv(self._parse_bitwise) 3255 start = seq_get(args, 0) 3256 increment = seq_get(args, 1) 3257 elif self._match_text_seq("START"): 3258 start = self._parse_bitwise() 3259 self._match_text_seq("INCREMENT") 3260 increment = self._parse_bitwise() 3261 3262 if start and increment: 3263 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3264 3265 return exp.AutoIncrementColumnConstraint() 3266 3267 def _parse_compress(self) -> exp.Expression: 3268 if self._match(TokenType.L_PAREN, advance=False): 3269 return self.expression( 3270 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3271 ) 3272 3273 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3274 3275 def _parse_generated_as_identity(self) -> exp.Expression: 3276 if self._match(TokenType.BY_DEFAULT): 3277 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3278 this = self.expression( 3279 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3280 ) 3281 else: 3282 self._match_text_seq("ALWAYS") 3283 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3284 3285 self._match_text_seq("AS", "IDENTITY") 3286 if self._match(TokenType.L_PAREN): 3287 if self._match_text_seq("START", "WITH"): 3288 this.set("start", self._parse_bitwise()) 3289 if self._match_text_seq("INCREMENT", "BY"): 3290 this.set("increment", self._parse_bitwise()) 3291 if self._match_text_seq("MINVALUE"): 3292 this.set("minvalue", self._parse_bitwise()) 3293 if self._match_text_seq("MAXVALUE"): 3294 this.set("maxvalue", self._parse_bitwise()) 3295 3296 if self._match_text_seq("CYCLE"): 3297 this.set("cycle", True) 3298 elif self._match_text_seq("NO", "CYCLE"): 3299 this.set("cycle", False) 3300 3301 self._match_r_paren() 3302 3303 return this 3304 3305 def _parse_inline(self) -> t.Optional[exp.Expression]: 3306 self._match_text_seq("LENGTH") 3307 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3308 3309 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3310 if self._match_text_seq("NULL"): 3311 return self.expression(exp.NotNullColumnConstraint) 3312 if self._match_text_seq("CASESPECIFIC"): 3313 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3314 return None 3315 3316 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3317 if self._match(TokenType.CONSTRAINT): 3318 this = self._parse_id_var() 3319 else: 3320 this = None 3321 3322 if self._match_texts(self.CONSTRAINT_PARSERS): 3323 return self.expression( 3324 exp.ColumnConstraint, 3325 this=this, 3326 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3327 ) 3328 3329 return this 3330 3331 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3332 if not self._match(TokenType.CONSTRAINT): 3333 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3334 3335 this = self._parse_id_var() 3336 expressions = [] 3337 3338 while True: 3339 constraint = self._parse_unnamed_constraint() or self._parse_function() 3340 if not constraint: 3341 break 3342 expressions.append(constraint) 3343 3344 return self.expression(exp.Constraint, this=this, expressions=expressions) 3345 3346 def _parse_unnamed_constraint( 3347 self, constraints: t.Optional[t.Collection[str]] = None 3348 ) -> t.Optional[exp.Expression]: 3349 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3350 return None 3351 3352 constraint = self._prev.text.upper() 3353 if constraint not in self.CONSTRAINT_PARSERS: 3354 self.raise_error(f"No parser found for schema constraint {constraint}.") 3355 3356 return self.CONSTRAINT_PARSERS[constraint](self) 3357 3358 def _parse_unique(self) -> exp.Expression: 3359 if not self._match(TokenType.L_PAREN, advance=False): 3360 return self.expression(exp.UniqueColumnConstraint) 3361 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3362 3363 def _parse_key_constraint_options(self) -> t.List[str]: 3364 options = [] 3365 while True: 3366 if not self._curr: 3367 break 3368 3369 if self._match(TokenType.ON): 3370 action = None 3371 on = self._advance_any() and self._prev.text 3372 3373 if self._match(TokenType.NO_ACTION): 3374 action = "NO ACTION" 3375 elif self._match(TokenType.CASCADE): 3376 action = "CASCADE" 3377 elif self._match_pair(TokenType.SET, TokenType.NULL): 3378 action = "SET NULL" 3379 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3380 action = "SET DEFAULT" 3381 else: 3382 self.raise_error("Invalid key constraint") 3383 3384 options.append(f"ON {on} {action}") 3385 elif self._match_text_seq("NOT", "ENFORCED"): 3386 options.append("NOT ENFORCED") 3387 elif self._match_text_seq("DEFERRABLE"): 3388 options.append("DEFERRABLE") 3389 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3390 options.append("INITIALLY DEFERRED") 3391 elif self._match_text_seq("NORELY"): 3392 options.append("NORELY") 3393 elif self._match_text_seq("MATCH", "FULL"): 3394 options.append("MATCH FULL") 3395 else: 3396 break 3397 3398 return options 3399 3400 def _parse_references(self, match=True) -> t.Optional[exp.Expression]: 3401 if match and not self._match(TokenType.REFERENCES): 3402 return None 3403 3404 expressions = None 3405 this = self._parse_id_var() 3406 3407 if self._match(TokenType.L_PAREN, advance=False): 3408 expressions = self._parse_wrapped_id_vars() 3409 3410 options = self._parse_key_constraint_options() 3411 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3412 3413 def _parse_foreign_key(self) -> exp.Expression: 3414 expressions = self._parse_wrapped_id_vars() 3415 reference = self._parse_references() 3416 options = {} 3417 3418 while self._match(TokenType.ON): 3419 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3420 self.raise_error("Expected DELETE or UPDATE") 3421 3422 kind = self._prev.text.lower() 3423 3424 if self._match(TokenType.NO_ACTION): 3425 action = "NO ACTION" 3426 elif self._match(TokenType.SET): 3427 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3428 action = "SET " + self._prev.text.upper() 3429 else: 3430 self._advance() 3431 action = self._prev.text.upper() 3432 3433 options[kind] = action 3434 3435 return self.expression( 3436 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3437 ) 3438 3439 def _parse_primary_key(self) -> exp.Expression: 3440 desc = ( 3441 self._match_set((TokenType.ASC, TokenType.DESC)) 3442 and self._prev.token_type == TokenType.DESC 3443 ) 3444 3445 if not self._match(TokenType.L_PAREN, advance=False): 3446 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3447 3448 expressions = self._parse_wrapped_csv(self._parse_field) 3449 options = self._parse_key_constraint_options() 3450 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3451 3452 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3453 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3454 return this 3455 3456 bracket_kind = self._prev.token_type 3457 expressions: t.List[t.Optional[exp.Expression]] 3458 3459 if self._match(TokenType.COLON): 3460 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3461 else: 3462 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3463 3464 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3465 if bracket_kind == TokenType.L_BRACE: 3466 this = self.expression(exp.Struct, expressions=expressions) 3467 elif not this or this.name.upper() == "ARRAY": 3468 this = self.expression(exp.Array, expressions=expressions) 3469 else: 3470 expressions = apply_index_offset(this, expressions, -self.index_offset) 3471 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3472 3473 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3474 self.raise_error("Expected ]") 3475 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3476 self.raise_error("Expected }") 3477 3478 self._add_comments(this) 3479 return self._parse_bracket(this) 3480 3481 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3482 if self._match(TokenType.COLON): 3483 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3484 return this 3485 3486 def _parse_case(self) -> t.Optional[exp.Expression]: 3487 ifs = [] 3488 default = None 3489 3490 expression = self._parse_conjunction() 3491 3492 while self._match(TokenType.WHEN): 3493 this = self._parse_conjunction() 3494 self._match(TokenType.THEN) 3495 then = self._parse_conjunction() 3496 ifs.append(self.expression(exp.If, this=this, true=then)) 3497 3498 if self._match(TokenType.ELSE): 3499 default = self._parse_conjunction() 3500 3501 if not self._match(TokenType.END): 3502 self.raise_error("Expected END after CASE", self._prev) 3503 3504 return self._parse_window( 3505 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3506 ) 3507 3508 def _parse_if(self) -> t.Optional[exp.Expression]: 3509 if self._match(TokenType.L_PAREN): 3510 args = self._parse_csv(self._parse_conjunction) 3511 this = exp.If.from_arg_list(args) 3512 self.validate_expression(this, args) 3513 self._match_r_paren() 3514 else: 3515 index = self._index - 1 3516 condition = self._parse_conjunction() 3517 3518 if not condition: 3519 self._retreat(index) 3520 return None 3521 3522 self._match(TokenType.THEN) 3523 true = self._parse_conjunction() 3524 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3525 self._match(TokenType.END) 3526 this = self.expression(exp.If, this=condition, true=true, false=false) 3527 3528 return self._parse_window(this) 3529 3530 def _parse_extract(self) -> exp.Expression: 3531 this = self._parse_function() or self._parse_var() or self._parse_type() 3532 3533 if self._match(TokenType.FROM): 3534 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3535 3536 if not self._match(TokenType.COMMA): 3537 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3538 3539 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3540 3541 def _parse_cast(self, strict: bool) -> exp.Expression: 3542 this = self._parse_conjunction() 3543 3544 if not self._match(TokenType.ALIAS): 3545 if self._match(TokenType.COMMA): 3546 return self.expression( 3547 exp.CastToStrType, this=this, expression=self._parse_string() 3548 ) 3549 else: 3550 self.raise_error("Expected AS after CAST") 3551 3552 to = self._parse_types() 3553 3554 if not to: 3555 self.raise_error("Expected TYPE after CAST") 3556 elif to.this == exp.DataType.Type.CHAR: 3557 if self._match(TokenType.CHARACTER_SET): 3558 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3559 3560 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3561 3562 def _parse_string_agg(self) -> exp.Expression: 3563 expression: t.Optional[exp.Expression] 3564 3565 if self._match(TokenType.DISTINCT): 3566 args = self._parse_csv(self._parse_conjunction) 3567 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3568 else: 3569 args = self._parse_csv(self._parse_conjunction) 3570 expression = seq_get(args, 0) 3571 3572 index = self._index 3573 if not self._match(TokenType.R_PAREN): 3574 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3575 order = self._parse_order(this=expression) 3576 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3577 3578 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3579 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3580 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3581 if not self._match(TokenType.WITHIN_GROUP): 3582 self._retreat(index) 3583 this = exp.GroupConcat.from_arg_list(args) 3584 self.validate_expression(this, args) 3585 return this 3586 3587 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3588 order = self._parse_order(this=expression) 3589 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3590 3591 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3592 to: t.Optional[exp.Expression] 3593 this = self._parse_bitwise() 3594 3595 if self._match(TokenType.USING): 3596 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3597 elif self._match(TokenType.COMMA): 3598 to = self._parse_bitwise() 3599 else: 3600 to = None 3601 3602 # Swap the argument order if needed to produce the correct AST 3603 if self.CONVERT_TYPE_FIRST: 3604 this, to = to, this 3605 3606 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3607 3608 def _parse_decode(self) -> t.Optional[exp.Expression]: 3609 """ 3610 There are generally two variants of the DECODE function: 3611 3612 - DECODE(bin, charset) 3613 - DECODE(expression, search, result [, search, result] ... [, default]) 3614 3615 The second variant will always be parsed into a CASE expression. Note that NULL 3616 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3617 instead of relying on pattern matching. 3618 """ 3619 args = self._parse_csv(self._parse_conjunction) 3620 3621 if len(args) < 3: 3622 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3623 3624 expression, *expressions = args 3625 if not expression: 3626 return None 3627 3628 ifs = [] 3629 for search, result in zip(expressions[::2], expressions[1::2]): 3630 if not search or not result: 3631 return None 3632 3633 if isinstance(search, exp.Literal): 3634 ifs.append( 3635 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3636 ) 3637 elif isinstance(search, exp.Null): 3638 ifs.append( 3639 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3640 ) 3641 else: 3642 cond = exp.or_( 3643 exp.EQ(this=expression.copy(), expression=search), 3644 exp.and_( 3645 exp.Is(this=expression.copy(), expression=exp.Null()), 3646 exp.Is(this=search.copy(), expression=exp.Null()), 3647 copy=False, 3648 ), 3649 copy=False, 3650 ) 3651 ifs.append(exp.If(this=cond, true=result)) 3652 3653 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3654 3655 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3656 self._match_text_seq("KEY") 3657 key = self._parse_field() 3658 self._match(TokenType.COLON) 3659 self._match_text_seq("VALUE") 3660 value = self._parse_field() 3661 if not key and not value: 3662 return None 3663 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3664 3665 def _parse_json_object(self) -> exp.Expression: 3666 expressions = self._parse_csv(self._parse_json_key_value) 3667 3668 null_handling = None 3669 if self._match_text_seq("NULL", "ON", "NULL"): 3670 null_handling = "NULL ON NULL" 3671 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3672 null_handling = "ABSENT ON NULL" 3673 3674 unique_keys = None 3675 if self._match_text_seq("WITH", "UNIQUE"): 3676 unique_keys = True 3677 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3678 unique_keys = False 3679 3680 self._match_text_seq("KEYS") 3681 3682 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3683 format_json = self._match_text_seq("FORMAT", "JSON") 3684 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3685 3686 return self.expression( 3687 exp.JSONObject, 3688 expressions=expressions, 3689 null_handling=null_handling, 3690 unique_keys=unique_keys, 3691 return_type=return_type, 3692 format_json=format_json, 3693 encoding=encoding, 3694 ) 3695 3696 def _parse_logarithm(self) -> exp.Expression: 3697 # Default argument order is base, expression 3698 args = self._parse_csv(self._parse_range) 3699 3700 if len(args) > 1: 3701 if not self.LOG_BASE_FIRST: 3702 args.reverse() 3703 return exp.Log.from_arg_list(args) 3704 3705 return self.expression( 3706 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3707 ) 3708 3709 def _parse_match_against(self) -> exp.Expression: 3710 expressions = self._parse_csv(self._parse_column) 3711 3712 self._match_text_seq(")", "AGAINST", "(") 3713 3714 this = self._parse_string() 3715 3716 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3717 modifier = "IN NATURAL LANGUAGE MODE" 3718 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3719 modifier = f"{modifier} WITH QUERY EXPANSION" 3720 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3721 modifier = "IN BOOLEAN MODE" 3722 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3723 modifier = "WITH QUERY EXPANSION" 3724 else: 3725 modifier = None 3726 3727 return self.expression( 3728 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3729 ) 3730 3731 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3732 def _parse_open_json(self) -> exp.Expression: 3733 this = self._parse_bitwise() 3734 path = self._match(TokenType.COMMA) and self._parse_string() 3735 3736 def _parse_open_json_column_def() -> exp.Expression: 3737 this = self._parse_field(any_token=True) 3738 kind = self._parse_types() 3739 path = self._parse_string() 3740 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3741 return self.expression( 3742 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3743 ) 3744 3745 expressions = None 3746 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3747 self._match_l_paren() 3748 expressions = self._parse_csv(_parse_open_json_column_def) 3749 3750 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3751 3752 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3753 args = self._parse_csv(self._parse_bitwise) 3754 3755 if self._match(TokenType.IN): 3756 return self.expression( 3757 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3758 ) 3759 3760 if haystack_first: 3761 haystack = seq_get(args, 0) 3762 needle = seq_get(args, 1) 3763 else: 3764 needle = seq_get(args, 0) 3765 haystack = seq_get(args, 1) 3766 3767 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3768 3769 self.validate_expression(this, args) 3770 3771 return this 3772 3773 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3774 args = self._parse_csv(self._parse_table) 3775 return exp.JoinHint(this=func_name.upper(), expressions=args) 3776 3777 def _parse_substring(self) -> exp.Expression: 3778 # Postgres supports the form: substring(string [from int] [for int]) 3779 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3780 3781 args = self._parse_csv(self._parse_bitwise) 3782 3783 if self._match(TokenType.FROM): 3784 args.append(self._parse_bitwise()) 3785 if self._match(TokenType.FOR): 3786 args.append(self._parse_bitwise()) 3787 3788 this = exp.Substring.from_arg_list(args) 3789 self.validate_expression(this, args) 3790 3791 return this 3792 3793 def _parse_trim(self) -> exp.Expression: 3794 # https://www.w3resource.com/sql/character-functions/trim.php 3795 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3796 3797 position = None 3798 collation = None 3799 3800 if self._match_set(self.TRIM_TYPES): 3801 position = self._prev.text.upper() 3802 3803 expression = self._parse_bitwise() 3804 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3805 this = self._parse_bitwise() 3806 else: 3807 this = expression 3808 expression = None 3809 3810 if self._match(TokenType.COLLATE): 3811 collation = self._parse_bitwise() 3812 3813 return self.expression( 3814 exp.Trim, 3815 this=this, 3816 position=position, 3817 expression=expression, 3818 collation=collation, 3819 ) 3820 3821 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3822 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3823 3824 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3825 return self._parse_window(self._parse_id_var(), alias=True) 3826 3827 def _parse_respect_or_ignore_nulls( 3828 self, this: t.Optional[exp.Expression] 3829 ) -> t.Optional[exp.Expression]: 3830 if self._match(TokenType.IGNORE_NULLS): 3831 return self.expression(exp.IgnoreNulls, this=this) 3832 if self._match(TokenType.RESPECT_NULLS): 3833 return self.expression(exp.RespectNulls, this=this) 3834 return this 3835 3836 def _parse_window( 3837 self, this: t.Optional[exp.Expression], alias: bool = False 3838 ) -> t.Optional[exp.Expression]: 3839 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3840 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3841 self._match_r_paren() 3842 3843 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3844 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3845 if self._match(TokenType.WITHIN_GROUP): 3846 order = self._parse_wrapped(self._parse_order) 3847 this = self.expression(exp.WithinGroup, this=this, expression=order) 3848 3849 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3850 # Some dialects choose to implement and some do not. 3851 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3852 3853 # There is some code above in _parse_lambda that handles 3854 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3855 3856 # The below changes handle 3857 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3858 3859 # Oracle allows both formats 3860 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3861 # and Snowflake chose to do the same for familiarity 3862 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3863 this = self._parse_respect_or_ignore_nulls(this) 3864 3865 # bigquery select from window x AS (partition by ...) 3866 if alias: 3867 over = None 3868 self._match(TokenType.ALIAS) 3869 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3870 return this 3871 else: 3872 over = self._prev.text.upper() 3873 3874 if not self._match(TokenType.L_PAREN): 3875 return self.expression( 3876 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3877 ) 3878 3879 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3880 3881 first = self._match(TokenType.FIRST) 3882 if self._match_text_seq("LAST"): 3883 first = False 3884 3885 partition = self._parse_partition_by() 3886 order = self._parse_order() 3887 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3888 3889 if kind: 3890 self._match(TokenType.BETWEEN) 3891 start = self._parse_window_spec() 3892 self._match(TokenType.AND) 3893 end = self._parse_window_spec() 3894 3895 spec = self.expression( 3896 exp.WindowSpec, 3897 kind=kind, 3898 start=start["value"], 3899 start_side=start["side"], 3900 end=end["value"], 3901 end_side=end["side"], 3902 ) 3903 else: 3904 spec = None 3905 3906 self._match_r_paren() 3907 3908 return self.expression( 3909 exp.Window, 3910 this=this, 3911 partition_by=partition, 3912 order=order, 3913 spec=spec, 3914 alias=window_alias, 3915 over=over, 3916 first=first, 3917 ) 3918 3919 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3920 self._match(TokenType.BETWEEN) 3921 3922 return { 3923 "value": ( 3924 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3925 ) 3926 or self._parse_bitwise(), 3927 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3928 } 3929 3930 def _parse_alias( 3931 self, this: t.Optional[exp.Expression], explicit: bool = False 3932 ) -> t.Optional[exp.Expression]: 3933 any_token = self._match(TokenType.ALIAS) 3934 3935 if explicit and not any_token: 3936 return this 3937 3938 if self._match(TokenType.L_PAREN): 3939 aliases = self.expression( 3940 exp.Aliases, 3941 this=this, 3942 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3943 ) 3944 self._match_r_paren(aliases) 3945 return aliases 3946 3947 alias = self._parse_id_var(any_token) 3948 3949 if alias: 3950 return self.expression(exp.Alias, this=this, alias=alias) 3951 3952 return this 3953 3954 def _parse_id_var( 3955 self, 3956 any_token: bool = True, 3957 tokens: t.Optional[t.Collection[TokenType]] = None, 3958 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3959 ) -> t.Optional[exp.Expression]: 3960 identifier = self._parse_identifier() 3961 3962 if identifier: 3963 return identifier 3964 3965 prefix = "" 3966 3967 if prefix_tokens: 3968 while self._match_set(prefix_tokens): 3969 prefix += self._prev.text 3970 3971 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3972 quoted = self._prev.token_type == TokenType.STRING 3973 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3974 3975 return None 3976 3977 def _parse_string(self) -> t.Optional[exp.Expression]: 3978 if self._match(TokenType.STRING): 3979 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3980 return self._parse_placeholder() 3981 3982 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 3983 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 3984 3985 def _parse_number(self) -> t.Optional[exp.Expression]: 3986 if self._match(TokenType.NUMBER): 3987 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3988 return self._parse_placeholder() 3989 3990 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3991 if self._match(TokenType.IDENTIFIER): 3992 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3993 return self._parse_placeholder() 3994 3995 def _parse_var( 3996 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 3997 ) -> t.Optional[exp.Expression]: 3998 if ( 3999 (any_token and self._advance_any()) 4000 or self._match(TokenType.VAR) 4001 or (self._match_set(tokens) if tokens else False) 4002 ): 4003 return self.expression(exp.Var, this=self._prev.text) 4004 return self._parse_placeholder() 4005 4006 def _advance_any(self) -> t.Optional[Token]: 4007 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4008 self._advance() 4009 return self._prev 4010 return None 4011 4012 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4013 return self._parse_var() or self._parse_string() 4014 4015 def _parse_null(self) -> t.Optional[exp.Expression]: 4016 if self._match(TokenType.NULL): 4017 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4018 return None 4019 4020 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4021 if self._match(TokenType.TRUE): 4022 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4023 if self._match(TokenType.FALSE): 4024 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4025 return None 4026 4027 def _parse_star(self) -> t.Optional[exp.Expression]: 4028 if self._match(TokenType.STAR): 4029 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4030 return None 4031 4032 def _parse_parameter(self) -> exp.Expression: 4033 wrapped = self._match(TokenType.L_BRACE) 4034 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4035 self._match(TokenType.R_BRACE) 4036 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4037 4038 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4039 if self._match_set(self.PLACEHOLDER_PARSERS): 4040 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4041 if placeholder: 4042 return placeholder 4043 self._advance(-1) 4044 return None 4045 4046 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4047 if not self._match(TokenType.EXCEPT): 4048 return None 4049 if self._match(TokenType.L_PAREN, advance=False): 4050 return self._parse_wrapped_csv(self._parse_column) 4051 return self._parse_csv(self._parse_column) 4052 4053 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4054 if not self._match(TokenType.REPLACE): 4055 return None 4056 if self._match(TokenType.L_PAREN, advance=False): 4057 return self._parse_wrapped_csv(self._parse_expression) 4058 return self._parse_csv(self._parse_expression) 4059 4060 def _parse_csv( 4061 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4062 ) -> t.List[t.Optional[exp.Expression]]: 4063 parse_result = parse_method() 4064 items = [parse_result] if parse_result is not None else [] 4065 4066 while self._match(sep): 4067 self._add_comments(parse_result) 4068 parse_result = parse_method() 4069 if parse_result is not None: 4070 items.append(parse_result) 4071 4072 return items 4073 4074 def _parse_tokens( 4075 self, parse_method: t.Callable, expressions: t.Dict 4076 ) -> t.Optional[exp.Expression]: 4077 this = parse_method() 4078 4079 while self._match_set(expressions): 4080 this = self.expression( 4081 expressions[self._prev.token_type], 4082 this=this, 4083 comments=self._prev_comments, 4084 expression=parse_method(), 4085 ) 4086 4087 return this 4088 4089 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4090 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4091 4092 def _parse_wrapped_csv( 4093 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4094 ) -> t.List[t.Optional[exp.Expression]]: 4095 return self._parse_wrapped( 4096 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4097 ) 4098 4099 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4100 wrapped = self._match(TokenType.L_PAREN) 4101 if not wrapped and not optional: 4102 self.raise_error("Expecting (") 4103 parse_result = parse_method() 4104 if wrapped: 4105 self._match_r_paren() 4106 return parse_result 4107 4108 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 4109 return self._parse_select() or self._parse_set_operations(self._parse_expression()) 4110 4111 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4112 return self._parse_set_operations( 4113 self._parse_select(nested=True, parse_subquery_alias=False) 4114 ) 4115 4116 def _parse_transaction(self) -> exp.Expression: 4117 this = None 4118 if self._match_texts(self.TRANSACTION_KIND): 4119 this = self._prev.text 4120 4121 self._match_texts({"TRANSACTION", "WORK"}) 4122 4123 modes = [] 4124 while True: 4125 mode = [] 4126 while self._match(TokenType.VAR): 4127 mode.append(self._prev.text) 4128 4129 if mode: 4130 modes.append(" ".join(mode)) 4131 if not self._match(TokenType.COMMA): 4132 break 4133 4134 return self.expression(exp.Transaction, this=this, modes=modes) 4135 4136 def _parse_commit_or_rollback(self) -> exp.Expression: 4137 chain = None 4138 savepoint = None 4139 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4140 4141 self._match_texts({"TRANSACTION", "WORK"}) 4142 4143 if self._match_text_seq("TO"): 4144 self._match_text_seq("SAVEPOINT") 4145 savepoint = self._parse_id_var() 4146 4147 if self._match(TokenType.AND): 4148 chain = not self._match_text_seq("NO") 4149 self._match_text_seq("CHAIN") 4150 4151 if is_rollback: 4152 return self.expression(exp.Rollback, savepoint=savepoint) 4153 return self.expression(exp.Commit, chain=chain) 4154 4155 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4156 if not self._match_text_seq("ADD"): 4157 return None 4158 4159 self._match(TokenType.COLUMN) 4160 exists_column = self._parse_exists(not_=True) 4161 expression = self._parse_column_def(self._parse_field(any_token=True)) 4162 4163 if expression: 4164 expression.set("exists", exists_column) 4165 4166 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4167 if self._match_texts(("FIRST", "AFTER")): 4168 position = self._prev.text 4169 column_position = self.expression( 4170 exp.ColumnPosition, this=self._parse_column(), position=position 4171 ) 4172 expression.set("position", column_position) 4173 4174 return expression 4175 4176 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4177 drop = self._match(TokenType.DROP) and self._parse_drop() 4178 if drop and not isinstance(drop, exp.Command): 4179 drop.set("kind", drop.args.get("kind", "COLUMN")) 4180 return drop 4181 4182 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4183 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4184 return self.expression( 4185 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4186 ) 4187 4188 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4189 this = None 4190 kind = self._prev.token_type 4191 4192 if kind == TokenType.CONSTRAINT: 4193 this = self._parse_id_var() 4194 4195 if self._match_text_seq("CHECK"): 4196 expression = self._parse_wrapped(self._parse_conjunction) 4197 enforced = self._match_text_seq("ENFORCED") 4198 4199 return self.expression( 4200 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4201 ) 4202 4203 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4204 expression = self._parse_foreign_key() 4205 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4206 expression = self._parse_primary_key() 4207 else: 4208 expression = None 4209 4210 return self.expression(exp.AddConstraint, this=this, expression=expression) 4211 4212 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4213 index = self._index - 1 4214 4215 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4216 return self._parse_csv(self._parse_add_constraint) 4217 4218 self._retreat(index) 4219 return self._parse_csv(self._parse_add_column) 4220 4221 def _parse_alter_table_alter(self) -> exp.Expression: 4222 self._match(TokenType.COLUMN) 4223 column = self._parse_field(any_token=True) 4224 4225 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4226 return self.expression(exp.AlterColumn, this=column, drop=True) 4227 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4228 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4229 4230 self._match_text_seq("SET", "DATA") 4231 return self.expression( 4232 exp.AlterColumn, 4233 this=column, 4234 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4235 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4236 using=self._match(TokenType.USING) and self._parse_conjunction(), 4237 ) 4238 4239 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4240 index = self._index - 1 4241 4242 partition_exists = self._parse_exists() 4243 if self._match(TokenType.PARTITION, advance=False): 4244 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4245 4246 self._retreat(index) 4247 return self._parse_csv(self._parse_drop_column) 4248 4249 def _parse_alter_table_rename(self) -> exp.Expression: 4250 self._match_text_seq("TO") 4251 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4252 4253 def _parse_alter(self) -> t.Optional[exp.Expression]: 4254 start = self._prev 4255 4256 if not self._match(TokenType.TABLE): 4257 return self._parse_as_command(start) 4258 4259 exists = self._parse_exists() 4260 this = self._parse_table(schema=True) 4261 4262 if self._next: 4263 self._advance() 4264 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4265 4266 if parser: 4267 actions = ensure_list(parser(self)) 4268 4269 if not self._curr: 4270 return self.expression( 4271 exp.AlterTable, 4272 this=this, 4273 exists=exists, 4274 actions=actions, 4275 ) 4276 return self._parse_as_command(start) 4277 4278 def _parse_merge(self) -> exp.Expression: 4279 self._match(TokenType.INTO) 4280 target = self._parse_table() 4281 4282 self._match(TokenType.USING) 4283 using = self._parse_table() 4284 4285 self._match(TokenType.ON) 4286 on = self._parse_conjunction() 4287 4288 whens = [] 4289 while self._match(TokenType.WHEN): 4290 matched = not self._match(TokenType.NOT) 4291 self._match_text_seq("MATCHED") 4292 source = ( 4293 False 4294 if self._match_text_seq("BY", "TARGET") 4295 else self._match_text_seq("BY", "SOURCE") 4296 ) 4297 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4298 4299 self._match(TokenType.THEN) 4300 4301 if self._match(TokenType.INSERT): 4302 _this = self._parse_star() 4303 if _this: 4304 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4305 else: 4306 then = self.expression( 4307 exp.Insert, 4308 this=self._parse_value(), 4309 expression=self._match(TokenType.VALUES) and self._parse_value(), 4310 ) 4311 elif self._match(TokenType.UPDATE): 4312 expressions = self._parse_star() 4313 if expressions: 4314 then = self.expression(exp.Update, expressions=expressions) 4315 else: 4316 then = self.expression( 4317 exp.Update, 4318 expressions=self._match(TokenType.SET) 4319 and self._parse_csv(self._parse_equality), 4320 ) 4321 elif self._match(TokenType.DELETE): 4322 then = self.expression(exp.Var, this=self._prev.text) 4323 else: 4324 then = None 4325 4326 whens.append( 4327 self.expression( 4328 exp.When, 4329 matched=matched, 4330 source=source, 4331 condition=condition, 4332 then=then, 4333 ) 4334 ) 4335 4336 return self.expression( 4337 exp.Merge, 4338 this=target, 4339 using=using, 4340 on=on, 4341 expressions=whens, 4342 ) 4343 4344 def _parse_show(self) -> t.Optional[exp.Expression]: 4345 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4346 if parser: 4347 return parser(self) 4348 self._advance() 4349 return self.expression(exp.Show, this=self._prev.text.upper()) 4350 4351 def _parse_set_item_assignment( 4352 self, kind: t.Optional[str] = None 4353 ) -> t.Optional[exp.Expression]: 4354 index = self._index 4355 4356 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4357 return self._parse_set_transaction(global_=kind == "GLOBAL") 4358 4359 left = self._parse_primary() or self._parse_id_var() 4360 4361 if not self._match_texts(("=", "TO")): 4362 self._retreat(index) 4363 return None 4364 4365 right = self._parse_statement() or self._parse_id_var() 4366 this = self.expression( 4367 exp.EQ, 4368 this=left, 4369 expression=right, 4370 ) 4371 4372 return self.expression( 4373 exp.SetItem, 4374 this=this, 4375 kind=kind, 4376 ) 4377 4378 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4379 self._match_text_seq("TRANSACTION") 4380 characteristics = self._parse_csv( 4381 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4382 ) 4383 return self.expression( 4384 exp.SetItem, 4385 expressions=characteristics, 4386 kind="TRANSACTION", 4387 **{"global": global_}, # type: ignore 4388 ) 4389 4390 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4391 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4392 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4393 4394 def _parse_set(self) -> exp.Expression: 4395 index = self._index 4396 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4397 4398 if self._curr: 4399 self._retreat(index) 4400 return self._parse_as_command(self._prev) 4401 4402 return set_ 4403 4404 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4405 for option in options: 4406 if self._match_text_seq(*option.split(" ")): 4407 return exp.Var(this=option) 4408 return None 4409 4410 def _parse_as_command(self, start: Token) -> exp.Command: 4411 while self._curr: 4412 self._advance() 4413 text = self._find_sql(start, self._prev) 4414 size = len(start.text) 4415 return exp.Command(this=text[:size], expression=text[size:]) 4416 4417 def _find_parser( 4418 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4419 ) -> t.Optional[t.Callable]: 4420 if not self._curr: 4421 return None 4422 4423 index = self._index 4424 this = [] 4425 while True: 4426 # The current token might be multiple words 4427 curr = self._curr.text.upper() 4428 key = curr.split(" ") 4429 this.append(curr) 4430 self._advance() 4431 result, trie = in_trie(trie, key) 4432 if result == 0: 4433 break 4434 if result == 2: 4435 subparser = parsers[" ".join(this)] 4436 return subparser 4437 self._retreat(index) 4438 return None 4439 4440 def _match(self, token_type, advance=True, expression=None): 4441 if not self._curr: 4442 return None 4443 4444 if self._curr.token_type == token_type: 4445 if advance: 4446 self._advance() 4447 self._add_comments(expression) 4448 return True 4449 4450 return None 4451 4452 def _match_set(self, types, advance=True): 4453 if not self._curr: 4454 return None 4455 4456 if self._curr.token_type in types: 4457 if advance: 4458 self._advance() 4459 return True 4460 4461 return None 4462 4463 def _match_pair(self, token_type_a, token_type_b, advance=True): 4464 if not self._curr or not self._next: 4465 return None 4466 4467 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4468 if advance: 4469 self._advance(2) 4470 return True 4471 4472 return None 4473 4474 def _match_l_paren(self, expression=None): 4475 if not self._match(TokenType.L_PAREN, expression=expression): 4476 self.raise_error("Expecting (") 4477 4478 def _match_r_paren(self, expression=None): 4479 if not self._match(TokenType.R_PAREN, expression=expression): 4480 self.raise_error("Expecting )") 4481 4482 def _match_texts(self, texts, advance=True): 4483 if self._curr and self._curr.text.upper() in texts: 4484 if advance: 4485 self._advance() 4486 return True 4487 return False 4488 4489 def _match_text_seq(self, *texts, advance=True): 4490 index = self._index 4491 for text in texts: 4492 if self._curr and self._curr.text.upper() == text: 4493 self._advance() 4494 else: 4495 self._retreat(index) 4496 return False 4497 4498 if not advance: 4499 self._retreat(index) 4500 4501 return True 4502 4503 def _replace_columns_with_dots(self, this): 4504 if isinstance(this, exp.Dot): 4505 exp.replace_children(this, self._replace_columns_with_dots) 4506 elif isinstance(this, exp.Column): 4507 exp.replace_children(this, self._replace_columns_with_dots) 4508 table = this.args.get("table") 4509 this = ( 4510 self.expression(exp.Dot, this=table, expression=this.this) 4511 if table 4512 else self.expression(exp.Var, this=this.name) 4513 ) 4514 elif isinstance(this, exp.Identifier): 4515 this = self.expression(exp.Var, this=this.name) 4516 return this 4517 4518 def _replace_lambda(self, node, lambda_variables): 4519 for column in node.find_all(exp.Column): 4520 if column.parts[0].name in lambda_variables: 4521 dot_or_id = column.to_dot() if column.table else column.this 4522 parent = column.parent 4523 4524 while isinstance(parent, exp.Dot): 4525 if not isinstance(parent.parent, exp.Dot): 4526 parent.replace(dot_or_id) 4527 break 4528 parent = parent.parent 4529 else: 4530 if column is node: 4531 node = dot_or_id 4532 else: 4533 column.replace(dot_or_id) 4534 return node
19def parse_var_map(args: t.Sequence) -> exp.Expression: 20 if len(args) == 1 and args[0].is_star: 21 return exp.StarMap(this=args[0]) 22 23 keys = [] 24 values = [] 25 for i in range(0, len(args), 2): 26 keys.append(args[i]) 27 values.append(args[i + 1]) 28 return exp.VarMap( 29 keys=exp.Array(expressions=keys), 30 values=exp.Array(expressions=values), 31 )
56class Parser(metaclass=_Parser): 57 """ 58 Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces 59 a parsed syntax tree. 60 61 Args: 62 error_level: the desired error level. 63 Default: ErrorLevel.RAISE 64 error_message_context: determines the amount of context to capture from a 65 query string when displaying the error message (in number of characters). 66 Default: 50. 67 index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. 68 Default: 0 69 alias_post_tablesample: If the table alias comes after tablesample. 70 Default: False 71 max_errors: Maximum number of error messages to include in a raised ParseError. 72 This is only relevant if error_level is ErrorLevel.RAISE. 73 Default: 3 74 null_ordering: Indicates the default null ordering method to use if not explicitly set. 75 Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". 76 Default: "nulls_are_small" 77 """ 78 79 FUNCTIONS: t.Dict[str, t.Callable] = { 80 **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()}, 81 "DATE_TO_DATE_STR": lambda args: exp.Cast( 82 this=seq_get(args, 0), 83 to=exp.DataType(this=exp.DataType.Type.TEXT), 84 ), 85 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 86 "IFNULL": exp.Coalesce.from_arg_list, 87 "LIKE": parse_like, 88 "TIME_TO_TIME_STR": lambda args: exp.Cast( 89 this=seq_get(args, 0), 90 to=exp.DataType(this=exp.DataType.Type.TEXT), 91 ), 92 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 93 this=exp.Cast( 94 this=seq_get(args, 0), 95 to=exp.DataType(this=exp.DataType.Type.TEXT), 96 ), 97 start=exp.Literal.number(1), 98 length=exp.Literal.number(10), 99 ), 100 "VAR_MAP": parse_var_map, 101 } 102 103 NO_PAREN_FUNCTIONS = { 104 TokenType.CURRENT_DATE: exp.CurrentDate, 105 TokenType.CURRENT_DATETIME: exp.CurrentDate, 106 TokenType.CURRENT_TIME: exp.CurrentTime, 107 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 108 TokenType.CURRENT_USER: exp.CurrentUser, 109 } 110 111 JOIN_HINTS: t.Set[str] = set() 112 113 NESTED_TYPE_TOKENS = { 114 TokenType.ARRAY, 115 TokenType.MAP, 116 TokenType.NULLABLE, 117 TokenType.STRUCT, 118 } 119 120 TYPE_TOKENS = { 121 TokenType.BIT, 122 TokenType.BOOLEAN, 123 TokenType.TINYINT, 124 TokenType.UTINYINT, 125 TokenType.SMALLINT, 126 TokenType.USMALLINT, 127 TokenType.INT, 128 TokenType.UINT, 129 TokenType.BIGINT, 130 TokenType.UBIGINT, 131 TokenType.INT128, 132 TokenType.UINT128, 133 TokenType.INT256, 134 TokenType.UINT256, 135 TokenType.FLOAT, 136 TokenType.DOUBLE, 137 TokenType.CHAR, 138 TokenType.NCHAR, 139 TokenType.VARCHAR, 140 TokenType.NVARCHAR, 141 TokenType.TEXT, 142 TokenType.MEDIUMTEXT, 143 TokenType.LONGTEXT, 144 TokenType.MEDIUMBLOB, 145 TokenType.LONGBLOB, 146 TokenType.BINARY, 147 TokenType.VARBINARY, 148 TokenType.JSON, 149 TokenType.JSONB, 150 TokenType.INTERVAL, 151 TokenType.TIME, 152 TokenType.TIMESTAMP, 153 TokenType.TIMESTAMPTZ, 154 TokenType.TIMESTAMPLTZ, 155 TokenType.DATETIME, 156 TokenType.DATETIME64, 157 TokenType.DATE, 158 TokenType.DECIMAL, 159 TokenType.BIGDECIMAL, 160 TokenType.UUID, 161 TokenType.GEOGRAPHY, 162 TokenType.GEOMETRY, 163 TokenType.HLLSKETCH, 164 TokenType.HSTORE, 165 TokenType.PSEUDO_TYPE, 166 TokenType.SUPER, 167 TokenType.SERIAL, 168 TokenType.SMALLSERIAL, 169 TokenType.BIGSERIAL, 170 TokenType.XML, 171 TokenType.UNIQUEIDENTIFIER, 172 TokenType.MONEY, 173 TokenType.SMALLMONEY, 174 TokenType.ROWVERSION, 175 TokenType.IMAGE, 176 TokenType.VARIANT, 177 TokenType.OBJECT, 178 TokenType.INET, 179 *NESTED_TYPE_TOKENS, 180 } 181 182 SUBQUERY_PREDICATES = { 183 TokenType.ANY: exp.Any, 184 TokenType.ALL: exp.All, 185 TokenType.EXISTS: exp.Exists, 186 TokenType.SOME: exp.Any, 187 } 188 189 RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT} 190 191 DB_CREATABLES = { 192 TokenType.DATABASE, 193 TokenType.SCHEMA, 194 TokenType.TABLE, 195 TokenType.VIEW, 196 } 197 198 CREATABLES = { 199 TokenType.COLUMN, 200 TokenType.FUNCTION, 201 TokenType.INDEX, 202 TokenType.PROCEDURE, 203 *DB_CREATABLES, 204 } 205 206 ID_VAR_TOKENS = { 207 TokenType.VAR, 208 TokenType.ANTI, 209 TokenType.APPLY, 210 TokenType.AUTO_INCREMENT, 211 TokenType.BEGIN, 212 TokenType.BOTH, 213 TokenType.BUCKET, 214 TokenType.CACHE, 215 TokenType.CASCADE, 216 TokenType.COLLATE, 217 TokenType.COMMAND, 218 TokenType.COMMENT, 219 TokenType.COMMIT, 220 TokenType.COMPOUND, 221 TokenType.CONSTRAINT, 222 TokenType.DEFAULT, 223 TokenType.DELETE, 224 TokenType.DESCRIBE, 225 TokenType.DIV, 226 TokenType.END, 227 TokenType.EXECUTE, 228 TokenType.ESCAPE, 229 TokenType.FALSE, 230 TokenType.FIRST, 231 TokenType.FILTER, 232 TokenType.FOLLOWING, 233 TokenType.FORMAT, 234 TokenType.FULL, 235 TokenType.IF, 236 TokenType.IS, 237 TokenType.ISNULL, 238 TokenType.INTERVAL, 239 TokenType.KEEP, 240 TokenType.LAZY, 241 TokenType.LEADING, 242 TokenType.LEFT, 243 TokenType.LOCAL, 244 TokenType.MATERIALIZED, 245 TokenType.MERGE, 246 TokenType.NATURAL, 247 TokenType.NEXT, 248 TokenType.OFFSET, 249 TokenType.ONLY, 250 TokenType.OPTIONS, 251 TokenType.ORDINALITY, 252 TokenType.OVERWRITE, 253 TokenType.PARTITION, 254 TokenType.PERCENT, 255 TokenType.PIVOT, 256 TokenType.PRAGMA, 257 TokenType.PRECEDING, 258 TokenType.RANGE, 259 TokenType.REFERENCES, 260 TokenType.RIGHT, 261 TokenType.ROW, 262 TokenType.ROWS, 263 TokenType.SEED, 264 TokenType.SEMI, 265 TokenType.SET, 266 TokenType.SETTINGS, 267 TokenType.SHOW, 268 TokenType.SORTKEY, 269 TokenType.TEMPORARY, 270 TokenType.TOP, 271 TokenType.TRAILING, 272 TokenType.TRUE, 273 TokenType.UNBOUNDED, 274 TokenType.UNIQUE, 275 TokenType.UNLOGGED, 276 TokenType.UNPIVOT, 277 TokenType.VOLATILE, 278 TokenType.WINDOW, 279 *CREATABLES, 280 *SUBQUERY_PREDICATES, 281 *TYPE_TOKENS, 282 *NO_PAREN_FUNCTIONS, 283 } 284 285 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 286 287 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 288 TokenType.APPLY, 289 TokenType.FULL, 290 TokenType.LEFT, 291 TokenType.NATURAL, 292 TokenType.OFFSET, 293 TokenType.RIGHT, 294 TokenType.WINDOW, 295 } 296 297 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 298 299 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 300 301 TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH} 302 303 FUNC_TOKENS = { 304 TokenType.COMMAND, 305 TokenType.CURRENT_DATE, 306 TokenType.CURRENT_DATETIME, 307 TokenType.CURRENT_TIMESTAMP, 308 TokenType.CURRENT_TIME, 309 TokenType.CURRENT_USER, 310 TokenType.FILTER, 311 TokenType.FIRST, 312 TokenType.FORMAT, 313 TokenType.GLOB, 314 TokenType.IDENTIFIER, 315 TokenType.INDEX, 316 TokenType.ISNULL, 317 TokenType.ILIKE, 318 TokenType.LIKE, 319 TokenType.MERGE, 320 TokenType.OFFSET, 321 TokenType.PRIMARY_KEY, 322 TokenType.RANGE, 323 TokenType.REPLACE, 324 TokenType.ROW, 325 TokenType.UNNEST, 326 TokenType.VAR, 327 TokenType.LEFT, 328 TokenType.RIGHT, 329 TokenType.DATE, 330 TokenType.DATETIME, 331 TokenType.TABLE, 332 TokenType.TIMESTAMP, 333 TokenType.TIMESTAMPTZ, 334 TokenType.WINDOW, 335 *TYPE_TOKENS, 336 *SUBQUERY_PREDICATES, 337 } 338 339 CONJUNCTION = { 340 TokenType.AND: exp.And, 341 TokenType.OR: exp.Or, 342 } 343 344 EQUALITY = { 345 TokenType.EQ: exp.EQ, 346 TokenType.NEQ: exp.NEQ, 347 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 348 } 349 350 COMPARISON = { 351 TokenType.GT: exp.GT, 352 TokenType.GTE: exp.GTE, 353 TokenType.LT: exp.LT, 354 TokenType.LTE: exp.LTE, 355 } 356 357 BITWISE = { 358 TokenType.AMP: exp.BitwiseAnd, 359 TokenType.CARET: exp.BitwiseXor, 360 TokenType.PIPE: exp.BitwiseOr, 361 TokenType.DPIPE: exp.DPipe, 362 } 363 364 TERM = { 365 TokenType.DASH: exp.Sub, 366 TokenType.PLUS: exp.Add, 367 TokenType.MOD: exp.Mod, 368 TokenType.COLLATE: exp.Collate, 369 } 370 371 FACTOR = { 372 TokenType.DIV: exp.IntDiv, 373 TokenType.LR_ARROW: exp.Distance, 374 TokenType.SLASH: exp.Div, 375 TokenType.STAR: exp.Mul, 376 } 377 378 TIMESTAMPS = { 379 TokenType.TIME, 380 TokenType.TIMESTAMP, 381 TokenType.TIMESTAMPTZ, 382 TokenType.TIMESTAMPLTZ, 383 } 384 385 SET_OPERATIONS = { 386 TokenType.UNION, 387 TokenType.INTERSECT, 388 TokenType.EXCEPT, 389 } 390 391 JOIN_SIDES = { 392 TokenType.LEFT, 393 TokenType.RIGHT, 394 TokenType.FULL, 395 } 396 397 JOIN_KINDS = { 398 TokenType.INNER, 399 TokenType.OUTER, 400 TokenType.CROSS, 401 TokenType.SEMI, 402 TokenType.ANTI, 403 } 404 405 LAMBDAS = { 406 TokenType.ARROW: lambda self, expressions: self.expression( 407 exp.Lambda, 408 this=self._replace_lambda( 409 self._parse_conjunction(), 410 {node.name for node in expressions}, 411 ), 412 expressions=expressions, 413 ), 414 TokenType.FARROW: lambda self, expressions: self.expression( 415 exp.Kwarg, 416 this=exp.Var(this=expressions[0].name), 417 expression=self._parse_conjunction(), 418 ), 419 } 420 421 COLUMN_OPERATORS = { 422 TokenType.DOT: None, 423 TokenType.DCOLON: lambda self, this, to: self.expression( 424 exp.Cast if self.STRICT_CAST else exp.TryCast, 425 this=this, 426 to=to, 427 ), 428 TokenType.ARROW: lambda self, this, path: self.expression( 429 exp.JSONExtract, 430 this=this, 431 expression=path, 432 ), 433 TokenType.DARROW: lambda self, this, path: self.expression( 434 exp.JSONExtractScalar, 435 this=this, 436 expression=path, 437 ), 438 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 439 exp.JSONBExtract, 440 this=this, 441 expression=path, 442 ), 443 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 444 exp.JSONBExtractScalar, 445 this=this, 446 expression=path, 447 ), 448 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 449 exp.JSONBContains, 450 this=this, 451 expression=key, 452 ), 453 } 454 455 EXPRESSION_PARSERS = { 456 exp.Column: lambda self: self._parse_column(), 457 exp.DataType: lambda self: self._parse_types(), 458 exp.From: lambda self: self._parse_from(), 459 exp.Group: lambda self: self._parse_group(), 460 exp.Identifier: lambda self: self._parse_id_var(), 461 exp.Lateral: lambda self: self._parse_lateral(), 462 exp.Join: lambda self: self._parse_join(), 463 exp.Order: lambda self: self._parse_order(), 464 exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster), 465 exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort), 466 exp.Lambda: lambda self: self._parse_lambda(), 467 exp.Limit: lambda self: self._parse_limit(), 468 exp.Offset: lambda self: self._parse_offset(), 469 exp.TableAlias: lambda self: self._parse_table_alias(), 470 exp.Table: lambda self: self._parse_table(), 471 exp.Condition: lambda self: self._parse_conjunction(), 472 exp.Expression: lambda self: self._parse_statement(), 473 exp.Properties: lambda self: self._parse_properties(), 474 exp.Where: lambda self: self._parse_where(), 475 exp.Ordered: lambda self: self._parse_ordered(), 476 exp.Having: lambda self: self._parse_having(), 477 exp.With: lambda self: self._parse_with(), 478 exp.Window: lambda self: self._parse_named_window(), 479 exp.Qualify: lambda self: self._parse_qualify(), 480 exp.Returning: lambda self: self._parse_returning(), 481 "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(), 482 } 483 484 STATEMENT_PARSERS = { 485 TokenType.ALTER: lambda self: self._parse_alter(), 486 TokenType.BEGIN: lambda self: self._parse_transaction(), 487 TokenType.CACHE: lambda self: self._parse_cache(), 488 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 489 TokenType.COMMENT: lambda self: self._parse_comment(), 490 TokenType.CREATE: lambda self: self._parse_create(), 491 TokenType.DELETE: lambda self: self._parse_delete(), 492 TokenType.DESC: lambda self: self._parse_describe(), 493 TokenType.DESCRIBE: lambda self: self._parse_describe(), 494 TokenType.DROP: lambda self: self._parse_drop(), 495 TokenType.END: lambda self: self._parse_commit_or_rollback(), 496 TokenType.INSERT: lambda self: self._parse_insert(), 497 TokenType.LOAD_DATA: lambda self: self._parse_load_data(), 498 TokenType.MERGE: lambda self: self._parse_merge(), 499 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 500 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 501 TokenType.SET: lambda self: self._parse_set(), 502 TokenType.UNCACHE: lambda self: self._parse_uncache(), 503 TokenType.UPDATE: lambda self: self._parse_update(), 504 TokenType.USE: lambda self: self.expression( 505 exp.Use, 506 kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA")) 507 and exp.Var(this=self._prev.text), 508 this=self._parse_table(schema=False), 509 ), 510 } 511 512 UNARY_PARSERS = { 513 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 514 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 515 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 516 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 517 } 518 519 PRIMARY_PARSERS = { 520 TokenType.STRING: lambda self, token: self.expression( 521 exp.Literal, this=token.text, is_string=True 522 ), 523 TokenType.NUMBER: lambda self, token: self.expression( 524 exp.Literal, this=token.text, is_string=False 525 ), 526 TokenType.STAR: lambda self, _: self.expression( 527 exp.Star, 528 **{"except": self._parse_except(), "replace": self._parse_replace()}, 529 ), 530 TokenType.NULL: lambda self, _: self.expression(exp.Null), 531 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 532 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 533 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 534 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 535 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 536 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 537 TokenType.NATIONAL: lambda self, token: self._parse_national(token), 538 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 539 } 540 541 PLACEHOLDER_PARSERS = { 542 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 543 TokenType.PARAMETER: lambda self: self._parse_parameter(), 544 TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text) 545 if self._match_set((TokenType.NUMBER, TokenType.VAR)) 546 else None, 547 } 548 549 RANGE_PARSERS = { 550 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 551 TokenType.GLOB: binary_range_parser(exp.Glob), 552 TokenType.ILIKE: binary_range_parser(exp.ILike), 553 TokenType.IN: lambda self, this: self._parse_in(this), 554 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 555 TokenType.IS: lambda self, this: self._parse_is(this), 556 TokenType.LIKE: binary_range_parser(exp.Like), 557 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 558 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 559 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 560 } 561 562 PROPERTY_PARSERS = { 563 "AFTER": lambda self: self._parse_afterjournal( 564 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 565 ), 566 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 567 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 568 "BEFORE": lambda self: self._parse_journal( 569 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 570 ), 571 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 572 "CHARACTER SET": lambda self: self._parse_character_set(), 573 "CHECKSUM": lambda self: self._parse_checksum(), 574 "CLUSTER BY": lambda self: self.expression( 575 exp.Cluster, expressions=self._parse_csv(self._parse_ordered) 576 ), 577 "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty), 578 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 579 "DATABLOCKSIZE": lambda self: self._parse_datablocksize( 580 default=self._prev.text.upper() == "DEFAULT" 581 ), 582 "DEFINER": lambda self: self._parse_definer(), 583 "DETERMINISTIC": lambda self: self.expression( 584 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 585 ), 586 "DISTKEY": lambda self: self._parse_distkey(), 587 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 588 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 589 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 590 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 591 "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"), 592 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 593 "FREESPACE": lambda self: self._parse_freespace(), 594 "GLOBAL": lambda self: self._parse_temporary(global_=True), 595 "IMMUTABLE": lambda self: self.expression( 596 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 597 ), 598 "JOURNAL": lambda self: self._parse_journal( 599 no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL" 600 ), 601 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 602 "LIKE": lambda self: self._parse_create_like(), 603 "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True), 604 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 605 "LOCK": lambda self: self._parse_locking(), 606 "LOCKING": lambda self: self._parse_locking(), 607 "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"), 608 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 609 "MAX": lambda self: self._parse_datablocksize(), 610 "MAXIMUM": lambda self: self._parse_datablocksize(), 611 "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio( 612 no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT" 613 ), 614 "MIN": lambda self: self._parse_datablocksize(), 615 "MINIMUM": lambda self: self._parse_datablocksize(), 616 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 617 "NO": lambda self: self._parse_noprimaryindex(), 618 "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False), 619 "ON": lambda self: self._parse_oncommit(), 620 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 621 "PARTITION BY": lambda self: self._parse_partitioned_by(), 622 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 623 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 624 "PRIMARY KEY": lambda self: self._parse_primary_key(), 625 "RETURNS": lambda self: self._parse_returns(), 626 "ROW": lambda self: self._parse_row(), 627 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 628 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 629 "SETTINGS": lambda self: self.expression( 630 exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item) 631 ), 632 "SORTKEY": lambda self: self._parse_sortkey(), 633 "STABLE": lambda self: self.expression( 634 exp.StabilityProperty, this=exp.Literal.string("STABLE") 635 ), 636 "STORED": lambda self: self._parse_stored(), 637 "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 638 "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property), 639 "TEMP": lambda self: self._parse_temporary(global_=False), 640 "TEMPORARY": lambda self: self._parse_temporary(global_=False), 641 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 642 "TTL": lambda self: self._parse_ttl(), 643 "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty), 644 "VOLATILE": lambda self: self._parse_volatile_property(), 645 "WITH": lambda self: self._parse_with_property(), 646 } 647 648 CONSTRAINT_PARSERS = { 649 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 650 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 651 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 652 "CHARACTER SET": lambda self: self.expression( 653 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 654 ), 655 "CHECK": lambda self: self.expression( 656 exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction) 657 ), 658 "COLLATE": lambda self: self.expression( 659 exp.CollateColumnConstraint, this=self._parse_var() 660 ), 661 "COMMENT": lambda self: self.expression( 662 exp.CommentColumnConstraint, this=self._parse_string() 663 ), 664 "COMPRESS": lambda self: self._parse_compress(), 665 "DEFAULT": lambda self: self.expression( 666 exp.DefaultColumnConstraint, this=self._parse_bitwise() 667 ), 668 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 669 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 670 "FORMAT": lambda self: self.expression( 671 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 672 ), 673 "GENERATED": lambda self: self._parse_generated_as_identity(), 674 "IDENTITY": lambda self: self._parse_auto_increment(), 675 "INLINE": lambda self: self._parse_inline(), 676 "LIKE": lambda self: self._parse_create_like(), 677 "NOT": lambda self: self._parse_not_constraint(), 678 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 679 "ON": lambda self: self._match(TokenType.UPDATE) 680 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()), 681 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 682 "PRIMARY KEY": lambda self: self._parse_primary_key(), 683 "REFERENCES": lambda self: self._parse_references(match=False), 684 "TITLE": lambda self: self.expression( 685 exp.TitleColumnConstraint, this=self._parse_var_or_string() 686 ), 687 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 688 "UNIQUE": lambda self: self._parse_unique(), 689 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 690 } 691 692 ALTER_PARSERS = { 693 "ADD": lambda self: self._parse_alter_table_add(), 694 "ALTER": lambda self: self._parse_alter_table_alter(), 695 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 696 "DROP": lambda self: self._parse_alter_table_drop(), 697 "RENAME": lambda self: self._parse_alter_table_rename(), 698 } 699 700 SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"} 701 702 NO_PAREN_FUNCTION_PARSERS = { 703 TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 704 TokenType.CASE: lambda self: self._parse_case(), 705 TokenType.IF: lambda self: self._parse_if(), 706 TokenType.NEXT_VALUE_FOR: lambda self: self.expression( 707 exp.NextValueFor, 708 this=self._parse_column(), 709 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 710 ), 711 } 712 713 FUNCTION_PARSERS: t.Dict[str, t.Callable] = { 714 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 715 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 716 "DECODE": lambda self: self._parse_decode(), 717 "EXTRACT": lambda self: self._parse_extract(), 718 "JSON_OBJECT": lambda self: self._parse_json_object(), 719 "LOG": lambda self: self._parse_logarithm(), 720 "MATCH": lambda self: self._parse_match_against(), 721 "OPENJSON": lambda self: self._parse_open_json(), 722 "POSITION": lambda self: self._parse_position(), 723 "STRING_AGG": lambda self: self._parse_string_agg(), 724 "SUBSTRING": lambda self: self._parse_substring(), 725 "TRIM": lambda self: self._parse_trim(), 726 "TRY_CAST": lambda self: self._parse_cast(False), 727 "TRY_CONVERT": lambda self: self._parse_convert(False), 728 } 729 730 QUERY_MODIFIER_PARSERS = { 731 "joins": lambda self: list(iter(self._parse_join, None)), 732 "laterals": lambda self: list(iter(self._parse_lateral, None)), 733 "match": lambda self: self._parse_match_recognize(), 734 "where": lambda self: self._parse_where(), 735 "group": lambda self: self._parse_group(), 736 "having": lambda self: self._parse_having(), 737 "qualify": lambda self: self._parse_qualify(), 738 "windows": lambda self: self._parse_window_clause(), 739 "order": lambda self: self._parse_order(), 740 "limit": lambda self: self._parse_limit(), 741 "offset": lambda self: self._parse_offset(), 742 "lock": lambda self: self._parse_lock(), 743 "sample": lambda self: self._parse_table_sample(as_modifier=True), 744 } 745 746 SET_PARSERS = { 747 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 748 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 749 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 750 "TRANSACTION": lambda self: self._parse_set_transaction(), 751 } 752 753 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 754 755 TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {} 756 757 MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table) 758 759 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 760 761 TRANSACTION_CHARACTERISTICS = { 762 "ISOLATION LEVEL REPEATABLE READ", 763 "ISOLATION LEVEL READ COMMITTED", 764 "ISOLATION LEVEL READ UNCOMMITTED", 765 "ISOLATION LEVEL SERIALIZABLE", 766 "READ WRITE", 767 "READ ONLY", 768 } 769 770 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 771 772 CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"} 773 774 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 775 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 776 777 ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY} 778 779 STRICT_CAST = True 780 781 CONVERT_TYPE_FIRST = False 782 783 PREFIXED_PIVOT_COLUMNS = False 784 IDENTIFY_PIVOT_STRINGS = False 785 786 LOG_BASE_FIRST = True 787 LOG_DEFAULTS_TO_LN = False 788 789 __slots__ = ( 790 "error_level", 791 "error_message_context", 792 "sql", 793 "errors", 794 "index_offset", 795 "unnest_column_only", 796 "alias_post_tablesample", 797 "max_errors", 798 "null_ordering", 799 "_tokens", 800 "_index", 801 "_curr", 802 "_next", 803 "_prev", 804 "_prev_comments", 805 "_show_trie", 806 "_set_trie", 807 ) 808 809 def __init__( 810 self, 811 error_level: t.Optional[ErrorLevel] = None, 812 error_message_context: int = 100, 813 index_offset: int = 0, 814 unnest_column_only: bool = False, 815 alias_post_tablesample: bool = False, 816 max_errors: int = 3, 817 null_ordering: t.Optional[str] = None, 818 ): 819 self.error_level = error_level or ErrorLevel.IMMEDIATE 820 self.error_message_context = error_message_context 821 self.index_offset = index_offset 822 self.unnest_column_only = unnest_column_only 823 self.alias_post_tablesample = alias_post_tablesample 824 self.max_errors = max_errors 825 self.null_ordering = null_ordering 826 self.reset() 827 828 def reset(self): 829 self.sql = "" 830 self.errors = [] 831 self._tokens = [] 832 self._index = 0 833 self._curr = None 834 self._next = None 835 self._prev = None 836 self._prev_comments = None 837 838 def parse( 839 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 840 ) -> t.List[t.Optional[exp.Expression]]: 841 """ 842 Parses a list of tokens and returns a list of syntax trees, one tree 843 per parsed SQL statement. 844 845 Args: 846 raw_tokens: the list of tokens. 847 sql: the original SQL string, used to produce helpful debug messages. 848 849 Returns: 850 The list of syntax trees. 851 """ 852 return self._parse( 853 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 854 ) 855 856 def parse_into( 857 self, 858 expression_types: exp.IntoType, 859 raw_tokens: t.List[Token], 860 sql: t.Optional[str] = None, 861 ) -> t.List[t.Optional[exp.Expression]]: 862 """ 863 Parses a list of tokens into a given Expression type. If a collection of Expression 864 types is given instead, this method will try to parse the token list into each one 865 of them, stopping at the first for which the parsing succeeds. 866 867 Args: 868 expression_types: the expression type(s) to try and parse the token list into. 869 raw_tokens: the list of tokens. 870 sql: the original SQL string, used to produce helpful debug messages. 871 872 Returns: 873 The target Expression. 874 """ 875 errors = [] 876 for expression_type in ensure_collection(expression_types): 877 parser = self.EXPRESSION_PARSERS.get(expression_type) 878 if not parser: 879 raise TypeError(f"No parser registered for {expression_type}") 880 try: 881 return self._parse(parser, raw_tokens, sql) 882 except ParseError as e: 883 e.errors[0]["into_expression"] = expression_type 884 errors.append(e) 885 raise ParseError( 886 f"Failed to parse into {expression_types}", 887 errors=merge_errors(errors), 888 ) from errors[-1] 889 890 def _parse( 891 self, 892 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 893 raw_tokens: t.List[Token], 894 sql: t.Optional[str] = None, 895 ) -> t.List[t.Optional[exp.Expression]]: 896 self.reset() 897 self.sql = sql or "" 898 total = len(raw_tokens) 899 chunks: t.List[t.List[Token]] = [[]] 900 901 for i, token in enumerate(raw_tokens): 902 if token.token_type == TokenType.SEMICOLON: 903 if i < total - 1: 904 chunks.append([]) 905 else: 906 chunks[-1].append(token) 907 908 expressions = [] 909 910 for tokens in chunks: 911 self._index = -1 912 self._tokens = tokens 913 self._advance() 914 915 expressions.append(parse_method(self)) 916 917 if self._index < len(self._tokens): 918 self.raise_error("Invalid expression / Unexpected token") 919 920 self.check_errors() 921 922 return expressions 923 924 def check_errors(self) -> None: 925 """ 926 Logs or raises any found errors, depending on the chosen error level setting. 927 """ 928 if self.error_level == ErrorLevel.WARN: 929 for error in self.errors: 930 logger.error(str(error)) 931 elif self.error_level == ErrorLevel.RAISE and self.errors: 932 raise ParseError( 933 concat_messages(self.errors, self.max_errors), 934 errors=merge_errors(self.errors), 935 ) 936 937 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 938 """ 939 Appends an error in the list of recorded errors or raises it, depending on the chosen 940 error level setting. 941 """ 942 token = token or self._curr or self._prev or Token.string("") 943 start = token.start 944 end = token.end 945 start_context = self.sql[max(start - self.error_message_context, 0) : start] 946 highlight = self.sql[start:end] 947 end_context = self.sql[end : end + self.error_message_context] 948 949 error = ParseError.new( 950 f"{message}. Line {token.line}, Col: {token.col}.\n" 951 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 952 description=message, 953 line=token.line, 954 col=token.col, 955 start_context=start_context, 956 highlight=highlight, 957 end_context=end_context, 958 ) 959 960 if self.error_level == ErrorLevel.IMMEDIATE: 961 raise error 962 963 self.errors.append(error) 964 965 def expression( 966 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 967 ) -> E: 968 """ 969 Creates a new, validated Expression. 970 971 Args: 972 exp_class: the expression class to instantiate. 973 comments: an optional list of comments to attach to the expression. 974 kwargs: the arguments to set for the expression along with their respective values. 975 976 Returns: 977 The target expression. 978 """ 979 instance = exp_class(**kwargs) 980 instance.add_comments(comments) if comments else self._add_comments(instance) 981 self.validate_expression(instance) 982 return instance 983 984 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 985 if expression and self._prev_comments: 986 expression.add_comments(self._prev_comments) 987 self._prev_comments = None 988 989 def validate_expression( 990 self, expression: exp.Expression, args: t.Optional[t.List] = None 991 ) -> None: 992 """ 993 Validates an already instantiated expression, making sure that all its mandatory arguments 994 are set. 995 996 Args: 997 expression: the expression to validate. 998 args: an optional list of items that was used to instantiate the expression, if it's a Func. 999 """ 1000 if self.error_level == ErrorLevel.IGNORE: 1001 return 1002 1003 for error_message in expression.error_messages(args): 1004 self.raise_error(error_message) 1005 1006 def _find_sql(self, start: Token, end: Token) -> str: 1007 return self.sql[start.start : end.end] 1008 1009 def _advance(self, times: int = 1) -> None: 1010 self._index += times 1011 self._curr = seq_get(self._tokens, self._index) 1012 self._next = seq_get(self._tokens, self._index + 1) 1013 if self._index > 0: 1014 self._prev = self._tokens[self._index - 1] 1015 self._prev_comments = self._prev.comments 1016 else: 1017 self._prev = None 1018 self._prev_comments = None 1019 1020 def _retreat(self, index: int) -> None: 1021 if index != self._index: 1022 self._advance(index - self._index) 1023 1024 def _parse_command(self) -> exp.Command: 1025 return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string()) 1026 1027 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1028 start = self._prev 1029 exists = self._parse_exists() if allow_exists else None 1030 1031 self._match(TokenType.ON) 1032 1033 kind = self._match_set(self.CREATABLES) and self._prev 1034 1035 if not kind: 1036 return self._parse_as_command(start) 1037 1038 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1039 this = self._parse_user_defined_function(kind=kind.token_type) 1040 elif kind.token_type == TokenType.TABLE: 1041 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1042 elif kind.token_type == TokenType.COLUMN: 1043 this = self._parse_column() 1044 else: 1045 this = self._parse_id_var() 1046 1047 self._match(TokenType.IS) 1048 1049 return self.expression( 1050 exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists 1051 ) 1052 1053 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1054 def _parse_ttl(self) -> exp.Expression: 1055 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1056 this = self._parse_bitwise() 1057 1058 if self._match_text_seq("DELETE"): 1059 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1060 if self._match_text_seq("RECOMPRESS"): 1061 return self.expression( 1062 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1063 ) 1064 if self._match_text_seq("TO", "DISK"): 1065 return self.expression( 1066 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1067 ) 1068 if self._match_text_seq("TO", "VOLUME"): 1069 return self.expression( 1070 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1071 ) 1072 1073 return this 1074 1075 expressions = self._parse_csv(_parse_ttl_action) 1076 where = self._parse_where() 1077 group = self._parse_group() 1078 1079 aggregates = None 1080 if group and self._match(TokenType.SET): 1081 aggregates = self._parse_csv(self._parse_set_item) 1082 1083 return self.expression( 1084 exp.MergeTreeTTL, 1085 expressions=expressions, 1086 where=where, 1087 group=group, 1088 aggregates=aggregates, 1089 ) 1090 1091 def _parse_statement(self) -> t.Optional[exp.Expression]: 1092 if self._curr is None: 1093 return None 1094 1095 if self._match_set(self.STATEMENT_PARSERS): 1096 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1097 1098 if self._match_set(Tokenizer.COMMANDS): 1099 return self._parse_command() 1100 1101 expression = self._parse_expression() 1102 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1103 return self._parse_query_modifiers(expression) 1104 1105 def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]: 1106 start = self._prev 1107 temporary = self._match(TokenType.TEMPORARY) 1108 materialized = self._match(TokenType.MATERIALIZED) 1109 kind = self._match_set(self.CREATABLES) and self._prev.text 1110 if not kind: 1111 return self._parse_as_command(start) 1112 1113 return self.expression( 1114 exp.Drop, 1115 exists=self._parse_exists(), 1116 this=self._parse_table(schema=True), 1117 kind=kind, 1118 temporary=temporary, 1119 materialized=materialized, 1120 cascade=self._match(TokenType.CASCADE), 1121 constraints=self._match_text_seq("CONSTRAINTS"), 1122 purge=self._match_text_seq("PURGE"), 1123 ) 1124 1125 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1126 return ( 1127 self._match(TokenType.IF) 1128 and (not not_ or self._match(TokenType.NOT)) 1129 and self._match(TokenType.EXISTS) 1130 ) 1131 1132 def _parse_create(self) -> t.Optional[exp.Expression]: 1133 start = self._prev 1134 replace = self._prev.text.upper() == "REPLACE" or self._match_pair( 1135 TokenType.OR, TokenType.REPLACE 1136 ) 1137 unique = self._match(TokenType.UNIQUE) 1138 1139 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1140 self._match(TokenType.TABLE) 1141 1142 properties = None 1143 create_token = self._match_set(self.CREATABLES) and self._prev 1144 1145 if not create_token: 1146 properties = self._parse_properties() # exp.Properties.Location.POST_CREATE 1147 create_token = self._match_set(self.CREATABLES) and self._prev 1148 1149 if not properties or not create_token: 1150 return self._parse_as_command(start) 1151 1152 exists = self._parse_exists(not_=True) 1153 this = None 1154 expression = None 1155 indexes = None 1156 no_schema_binding = None 1157 begin = None 1158 clone = None 1159 1160 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1161 this = self._parse_user_defined_function(kind=create_token.token_type) 1162 temp_properties = self._parse_properties() 1163 if properties and temp_properties: 1164 properties.expressions.extend(temp_properties.expressions) 1165 elif temp_properties: 1166 properties = temp_properties 1167 1168 self._match(TokenType.ALIAS) 1169 begin = self._match(TokenType.BEGIN) 1170 return_ = self._match_text_seq("RETURN") 1171 expression = self._parse_statement() 1172 1173 if return_: 1174 expression = self.expression(exp.Return, this=expression) 1175 elif create_token.token_type == TokenType.INDEX: 1176 this = self._parse_index() 1177 elif create_token.token_type in self.DB_CREATABLES: 1178 table_parts = self._parse_table_parts(schema=True) 1179 1180 # exp.Properties.Location.POST_NAME 1181 if self._match(TokenType.COMMA): 1182 temp_properties = self._parse_properties(before=True) 1183 if properties and temp_properties: 1184 properties.expressions.extend(temp_properties.expressions) 1185 elif temp_properties: 1186 properties = temp_properties 1187 1188 this = self._parse_schema(this=table_parts) 1189 1190 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1191 temp_properties = self._parse_properties() 1192 if properties and temp_properties: 1193 properties.expressions.extend(temp_properties.expressions) 1194 elif temp_properties: 1195 properties = temp_properties 1196 1197 self._match(TokenType.ALIAS) 1198 1199 # exp.Properties.Location.POST_ALIAS 1200 if not ( 1201 self._match(TokenType.SELECT, advance=False) 1202 or self._match(TokenType.WITH, advance=False) 1203 or self._match(TokenType.L_PAREN, advance=False) 1204 ): 1205 temp_properties = self._parse_properties() 1206 if properties and temp_properties: 1207 properties.expressions.extend(temp_properties.expressions) 1208 elif temp_properties: 1209 properties = temp_properties 1210 1211 expression = self._parse_ddl_select() 1212 1213 if create_token.token_type == TokenType.TABLE: 1214 # exp.Properties.Location.POST_EXPRESSION 1215 temp_properties = self._parse_properties() 1216 if properties and temp_properties: 1217 properties.expressions.extend(temp_properties.expressions) 1218 elif temp_properties: 1219 properties = temp_properties 1220 1221 indexes = [] 1222 while True: 1223 index = self._parse_create_table_index() 1224 1225 # exp.Properties.Location.POST_INDEX 1226 if self._match(TokenType.PARTITION_BY, advance=False): 1227 temp_properties = self._parse_properties() 1228 if properties and temp_properties: 1229 properties.expressions.extend(temp_properties.expressions) 1230 elif temp_properties: 1231 properties = temp_properties 1232 1233 if not index: 1234 break 1235 else: 1236 indexes.append(index) 1237 elif create_token.token_type == TokenType.VIEW: 1238 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1239 no_schema_binding = True 1240 1241 if self._match_text_seq("CLONE"): 1242 clone = self._parse_table(schema=True) 1243 when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper() 1244 clone_kind = ( 1245 self._match(TokenType.L_PAREN) 1246 and self._match_texts(self.CLONE_KINDS) 1247 and self._prev.text.upper() 1248 ) 1249 clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise() 1250 self._match(TokenType.R_PAREN) 1251 clone = self.expression( 1252 exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression 1253 ) 1254 1255 return self.expression( 1256 exp.Create, 1257 this=this, 1258 kind=create_token.text, 1259 replace=replace, 1260 unique=unique, 1261 expression=expression, 1262 exists=exists, 1263 properties=properties, 1264 indexes=indexes, 1265 no_schema_binding=no_schema_binding, 1266 begin=begin, 1267 clone=clone, 1268 ) 1269 1270 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1271 self._match(TokenType.COMMA) 1272 1273 # parsers look to _prev for no/dual/default, so need to consume first 1274 self._match_text_seq("NO") 1275 self._match_text_seq("DUAL") 1276 self._match_text_seq("DEFAULT") 1277 1278 if self.PROPERTY_PARSERS.get(self._curr.text.upper()): 1279 return self.PROPERTY_PARSERS[self._curr.text.upper()](self) 1280 1281 return None 1282 1283 def _parse_property(self) -> t.Optional[exp.Expression]: 1284 if self._match_texts(self.PROPERTY_PARSERS): 1285 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1286 1287 if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET): 1288 return self._parse_character_set(default=True) 1289 1290 if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY): 1291 return self._parse_sortkey(compound=True) 1292 1293 if self._match_text_seq("SQL", "SECURITY"): 1294 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1295 1296 assignment = self._match_pair( 1297 TokenType.VAR, TokenType.EQ, advance=False 1298 ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False) 1299 1300 if assignment: 1301 key = self._parse_var_or_string() 1302 self._match(TokenType.EQ) 1303 return self.expression(exp.Property, this=key, value=self._parse_column()) 1304 1305 return None 1306 1307 def _parse_stored(self) -> exp.Expression: 1308 self._match(TokenType.ALIAS) 1309 1310 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 1311 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 1312 1313 return self.expression( 1314 exp.FileFormatProperty, 1315 this=self.expression( 1316 exp.InputOutputFormat, input_format=input_format, output_format=output_format 1317 ) 1318 if input_format or output_format 1319 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(), 1320 ) 1321 1322 def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression: 1323 self._match(TokenType.EQ) 1324 self._match(TokenType.ALIAS) 1325 return self.expression(exp_class, this=self._parse_field()) 1326 1327 def _parse_properties(self, before=None) -> t.Optional[exp.Expression]: 1328 properties = [] 1329 1330 while True: 1331 if before: 1332 identified_property = self._parse_property_before() 1333 else: 1334 identified_property = self._parse_property() 1335 1336 if not identified_property: 1337 break 1338 for p in ensure_list(identified_property): 1339 properties.append(p) 1340 1341 if properties: 1342 return self.expression(exp.Properties, expressions=properties) 1343 1344 return None 1345 1346 def _parse_fallback(self, no=False) -> exp.Expression: 1347 self._match_text_seq("FALLBACK") 1348 return self.expression( 1349 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 1350 ) 1351 1352 def _parse_volatile_property(self) -> exp.Expression: 1353 if self._index >= 2: 1354 pre_volatile_token = self._tokens[self._index - 2] 1355 else: 1356 pre_volatile_token = None 1357 1358 if pre_volatile_token and pre_volatile_token.token_type in ( 1359 TokenType.CREATE, 1360 TokenType.REPLACE, 1361 TokenType.UNIQUE, 1362 ): 1363 return exp.VolatileProperty() 1364 1365 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 1366 1367 def _parse_with_property( 1368 self, 1369 ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]: 1370 self._match(TokenType.WITH) 1371 if self._match(TokenType.L_PAREN, advance=False): 1372 return self._parse_wrapped_csv(self._parse_property) 1373 1374 if self._match_text_seq("JOURNAL"): 1375 return self._parse_withjournaltable() 1376 1377 if self._match_text_seq("DATA"): 1378 return self._parse_withdata(no=False) 1379 elif self._match_text_seq("NO", "DATA"): 1380 return self._parse_withdata(no=True) 1381 1382 if not self._next: 1383 return None 1384 1385 return self._parse_withisolatedloading() 1386 1387 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 1388 def _parse_definer(self) -> t.Optional[exp.Expression]: 1389 self._match(TokenType.EQ) 1390 1391 user = self._parse_id_var() 1392 self._match(TokenType.PARAMETER) 1393 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 1394 1395 if not user or not host: 1396 return None 1397 1398 return exp.DefinerProperty(this=f"{user}@{host}") 1399 1400 def _parse_withjournaltable(self) -> exp.Expression: 1401 self._match(TokenType.TABLE) 1402 self._match(TokenType.EQ) 1403 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 1404 1405 def _parse_log(self, no=False) -> exp.Expression: 1406 self._match_text_seq("LOG") 1407 return self.expression(exp.LogProperty, no=no) 1408 1409 def _parse_journal(self, no=False, dual=False) -> exp.Expression: 1410 before = self._match_text_seq("BEFORE") 1411 self._match_text_seq("JOURNAL") 1412 return self.expression(exp.JournalProperty, no=no, dual=dual, before=before) 1413 1414 def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression: 1415 self._match_text_seq("NOT") 1416 self._match_text_seq("LOCAL") 1417 self._match_text_seq("AFTER", "JOURNAL") 1418 return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local) 1419 1420 def _parse_checksum(self) -> exp.Expression: 1421 self._match_text_seq("CHECKSUM") 1422 self._match(TokenType.EQ) 1423 1424 on = None 1425 if self._match(TokenType.ON): 1426 on = True 1427 elif self._match_text_seq("OFF"): 1428 on = False 1429 default = self._match(TokenType.DEFAULT) 1430 1431 return self.expression( 1432 exp.ChecksumProperty, 1433 on=on, 1434 default=default, 1435 ) 1436 1437 def _parse_freespace(self) -> exp.Expression: 1438 self._match_text_seq("FREESPACE") 1439 self._match(TokenType.EQ) 1440 return self.expression( 1441 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 1442 ) 1443 1444 def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression: 1445 self._match_text_seq("MERGEBLOCKRATIO") 1446 if self._match(TokenType.EQ): 1447 return self.expression( 1448 exp.MergeBlockRatioProperty, 1449 this=self._parse_number(), 1450 percent=self._match(TokenType.PERCENT), 1451 ) 1452 else: 1453 return self.expression( 1454 exp.MergeBlockRatioProperty, 1455 no=no, 1456 default=default, 1457 ) 1458 1459 def _parse_datablocksize(self, default=None) -> exp.Expression: 1460 if default: 1461 self._match_text_seq("DATABLOCKSIZE") 1462 return self.expression(exp.DataBlocksizeProperty, default=True) 1463 elif self._match_texts(("MIN", "MINIMUM")): 1464 self._match_text_seq("DATABLOCKSIZE") 1465 return self.expression(exp.DataBlocksizeProperty, min=True) 1466 elif self._match_texts(("MAX", "MAXIMUM")): 1467 self._match_text_seq("DATABLOCKSIZE") 1468 return self.expression(exp.DataBlocksizeProperty, min=False) 1469 1470 self._match_text_seq("DATABLOCKSIZE") 1471 self._match(TokenType.EQ) 1472 size = self._parse_number() 1473 units = None 1474 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 1475 units = self._prev.text 1476 return self.expression(exp.DataBlocksizeProperty, size=size, units=units) 1477 1478 def _parse_blockcompression(self) -> exp.Expression: 1479 self._match_text_seq("BLOCKCOMPRESSION") 1480 self._match(TokenType.EQ) 1481 always = self._match_text_seq("ALWAYS") 1482 manual = self._match_text_seq("MANUAL") 1483 never = self._match_text_seq("NEVER") 1484 default = self._match_text_seq("DEFAULT") 1485 autotemp = None 1486 if self._match_text_seq("AUTOTEMP"): 1487 autotemp = self._parse_schema() 1488 1489 return self.expression( 1490 exp.BlockCompressionProperty, 1491 always=always, 1492 manual=manual, 1493 never=never, 1494 default=default, 1495 autotemp=autotemp, 1496 ) 1497 1498 def _parse_withisolatedloading(self) -> exp.Expression: 1499 no = self._match_text_seq("NO") 1500 concurrent = self._match_text_seq("CONCURRENT") 1501 self._match_text_seq("ISOLATED", "LOADING") 1502 for_all = self._match_text_seq("FOR", "ALL") 1503 for_insert = self._match_text_seq("FOR", "INSERT") 1504 for_none = self._match_text_seq("FOR", "NONE") 1505 return self.expression( 1506 exp.IsolatedLoadingProperty, 1507 no=no, 1508 concurrent=concurrent, 1509 for_all=for_all, 1510 for_insert=for_insert, 1511 for_none=for_none, 1512 ) 1513 1514 def _parse_locking(self) -> exp.Expression: 1515 if self._match(TokenType.TABLE): 1516 kind = "TABLE" 1517 elif self._match(TokenType.VIEW): 1518 kind = "VIEW" 1519 elif self._match(TokenType.ROW): 1520 kind = "ROW" 1521 elif self._match_text_seq("DATABASE"): 1522 kind = "DATABASE" 1523 else: 1524 kind = None 1525 1526 if kind in ("DATABASE", "TABLE", "VIEW"): 1527 this = self._parse_table_parts() 1528 else: 1529 this = None 1530 1531 if self._match(TokenType.FOR): 1532 for_or_in = "FOR" 1533 elif self._match(TokenType.IN): 1534 for_or_in = "IN" 1535 else: 1536 for_or_in = None 1537 1538 if self._match_text_seq("ACCESS"): 1539 lock_type = "ACCESS" 1540 elif self._match_texts(("EXCL", "EXCLUSIVE")): 1541 lock_type = "EXCLUSIVE" 1542 elif self._match_text_seq("SHARE"): 1543 lock_type = "SHARE" 1544 elif self._match_text_seq("READ"): 1545 lock_type = "READ" 1546 elif self._match_text_seq("WRITE"): 1547 lock_type = "WRITE" 1548 elif self._match_text_seq("CHECKSUM"): 1549 lock_type = "CHECKSUM" 1550 else: 1551 lock_type = None 1552 1553 override = self._match_text_seq("OVERRIDE") 1554 1555 return self.expression( 1556 exp.LockingProperty, 1557 this=this, 1558 kind=kind, 1559 for_or_in=for_or_in, 1560 lock_type=lock_type, 1561 override=override, 1562 ) 1563 1564 def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]: 1565 if self._match(TokenType.PARTITION_BY): 1566 return self._parse_csv(self._parse_conjunction) 1567 return [] 1568 1569 def _parse_partitioned_by(self) -> exp.Expression: 1570 self._match(TokenType.EQ) 1571 return self.expression( 1572 exp.PartitionedByProperty, 1573 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 1574 ) 1575 1576 def _parse_withdata(self, no=False) -> exp.Expression: 1577 if self._match_text_seq("AND", "STATISTICS"): 1578 statistics = True 1579 elif self._match_text_seq("AND", "NO", "STATISTICS"): 1580 statistics = False 1581 else: 1582 statistics = None 1583 1584 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 1585 1586 def _parse_noprimaryindex(self) -> exp.Expression: 1587 self._match_text_seq("PRIMARY", "INDEX") 1588 return exp.NoPrimaryIndexProperty() 1589 1590 def _parse_oncommit(self) -> exp.Expression: 1591 self._match_text_seq("COMMIT", "PRESERVE", "ROWS") 1592 return exp.OnCommitProperty() 1593 1594 def _parse_distkey(self) -> exp.Expression: 1595 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 1596 1597 def _parse_create_like(self) -> t.Optional[exp.Expression]: 1598 table = self._parse_table(schema=True) 1599 options = [] 1600 while self._match_texts(("INCLUDING", "EXCLUDING")): 1601 this = self._prev.text.upper() 1602 id_var = self._parse_id_var() 1603 1604 if not id_var: 1605 return None 1606 1607 options.append( 1608 self.expression( 1609 exp.Property, 1610 this=this, 1611 value=exp.Var(this=id_var.this.upper()), 1612 ) 1613 ) 1614 return self.expression(exp.LikeProperty, this=table, expressions=options) 1615 1616 def _parse_sortkey(self, compound: bool = False) -> exp.Expression: 1617 return self.expression( 1618 exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound 1619 ) 1620 1621 def _parse_character_set(self, default: bool = False) -> exp.Expression: 1622 self._match(TokenType.EQ) 1623 return self.expression( 1624 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 1625 ) 1626 1627 def _parse_returns(self) -> exp.Expression: 1628 value: t.Optional[exp.Expression] 1629 is_table = self._match(TokenType.TABLE) 1630 1631 if is_table: 1632 if self._match(TokenType.LT): 1633 value = self.expression( 1634 exp.Schema, 1635 this="TABLE", 1636 expressions=self._parse_csv(self._parse_struct_types), 1637 ) 1638 if not self._match(TokenType.GT): 1639 self.raise_error("Expecting >") 1640 else: 1641 value = self._parse_schema(exp.Var(this="TABLE")) 1642 else: 1643 value = self._parse_types() 1644 1645 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table) 1646 1647 def _parse_temporary(self, global_=False) -> exp.Expression: 1648 self._match(TokenType.TEMPORARY) # in case calling from "GLOBAL" 1649 return self.expression(exp.TemporaryProperty, global_=global_) 1650 1651 def _parse_describe(self) -> exp.Expression: 1652 kind = self._match_set(self.CREATABLES) and self._prev.text 1653 this = self._parse_table() 1654 1655 return self.expression(exp.Describe, this=this, kind=kind) 1656 1657 def _parse_insert(self) -> exp.Expression: 1658 overwrite = self._match(TokenType.OVERWRITE) 1659 local = self._match(TokenType.LOCAL) 1660 alternative = None 1661 1662 if self._match_text_seq("DIRECTORY"): 1663 this: t.Optional[exp.Expression] = self.expression( 1664 exp.Directory, 1665 this=self._parse_var_or_string(), 1666 local=local, 1667 row_format=self._parse_row_format(match_row=True), 1668 ) 1669 else: 1670 if self._match(TokenType.OR): 1671 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 1672 1673 self._match(TokenType.INTO) 1674 self._match(TokenType.TABLE) 1675 this = self._parse_table(schema=True) 1676 1677 return self.expression( 1678 exp.Insert, 1679 this=this, 1680 exists=self._parse_exists(), 1681 partition=self._parse_partition(), 1682 expression=self._parse_ddl_select(), 1683 conflict=self._parse_on_conflict(), 1684 returning=self._parse_returning(), 1685 overwrite=overwrite, 1686 alternative=alternative, 1687 ) 1688 1689 def _parse_on_conflict(self) -> t.Optional[exp.Expression]: 1690 conflict = self._match_text_seq("ON", "CONFLICT") 1691 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 1692 1693 if not (conflict or duplicate): 1694 return None 1695 1696 nothing = None 1697 expressions = None 1698 key = None 1699 constraint = None 1700 1701 if conflict: 1702 if self._match_text_seq("ON", "CONSTRAINT"): 1703 constraint = self._parse_id_var() 1704 else: 1705 key = self._parse_csv(self._parse_value) 1706 1707 self._match_text_seq("DO") 1708 if self._match_text_seq("NOTHING"): 1709 nothing = True 1710 else: 1711 self._match(TokenType.UPDATE) 1712 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 1713 1714 return self.expression( 1715 exp.OnConflict, 1716 duplicate=duplicate, 1717 expressions=expressions, 1718 nothing=nothing, 1719 key=key, 1720 constraint=constraint, 1721 ) 1722 1723 def _parse_returning(self) -> t.Optional[exp.Expression]: 1724 if not self._match(TokenType.RETURNING): 1725 return None 1726 1727 return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column)) 1728 1729 def _parse_row(self) -> t.Optional[exp.Expression]: 1730 if not self._match(TokenType.FORMAT): 1731 return None 1732 return self._parse_row_format() 1733 1734 def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]: 1735 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 1736 return None 1737 1738 if self._match_text_seq("SERDE"): 1739 return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string()) 1740 1741 self._match_text_seq("DELIMITED") 1742 1743 kwargs = {} 1744 1745 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 1746 kwargs["fields"] = self._parse_string() 1747 if self._match_text_seq("ESCAPED", "BY"): 1748 kwargs["escaped"] = self._parse_string() 1749 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 1750 kwargs["collection_items"] = self._parse_string() 1751 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 1752 kwargs["map_keys"] = self._parse_string() 1753 if self._match_text_seq("LINES", "TERMINATED", "BY"): 1754 kwargs["lines"] = self._parse_string() 1755 if self._match_text_seq("NULL", "DEFINED", "AS"): 1756 kwargs["null"] = self._parse_string() 1757 1758 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 1759 1760 def _parse_load_data(self) -> exp.Expression: 1761 local = self._match(TokenType.LOCAL) 1762 self._match_text_seq("INPATH") 1763 inpath = self._parse_string() 1764 overwrite = self._match(TokenType.OVERWRITE) 1765 self._match_pair(TokenType.INTO, TokenType.TABLE) 1766 1767 return self.expression( 1768 exp.LoadData, 1769 this=self._parse_table(schema=True), 1770 local=local, 1771 overwrite=overwrite, 1772 inpath=inpath, 1773 partition=self._parse_partition(), 1774 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 1775 serde=self._match_text_seq("SERDE") and self._parse_string(), 1776 ) 1777 1778 def _parse_delete(self) -> exp.Expression: 1779 self._match(TokenType.FROM) 1780 1781 return self.expression( 1782 exp.Delete, 1783 this=self._parse_table(), 1784 using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()), 1785 where=self._parse_where(), 1786 returning=self._parse_returning(), 1787 ) 1788 1789 def _parse_update(self) -> exp.Expression: 1790 return self.expression( 1791 exp.Update, 1792 **{ # type: ignore 1793 "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS), 1794 "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality), 1795 "from": self._parse_from(modifiers=True), 1796 "where": self._parse_where(), 1797 "returning": self._parse_returning(), 1798 }, 1799 ) 1800 1801 def _parse_uncache(self) -> exp.Expression: 1802 if not self._match(TokenType.TABLE): 1803 self.raise_error("Expecting TABLE after UNCACHE") 1804 1805 return self.expression( 1806 exp.Uncache, 1807 exists=self._parse_exists(), 1808 this=self._parse_table(schema=True), 1809 ) 1810 1811 def _parse_cache(self) -> exp.Expression: 1812 lazy = self._match(TokenType.LAZY) 1813 self._match(TokenType.TABLE) 1814 table = self._parse_table(schema=True) 1815 options = [] 1816 1817 if self._match(TokenType.OPTIONS): 1818 self._match_l_paren() 1819 k = self._parse_string() 1820 self._match(TokenType.EQ) 1821 v = self._parse_string() 1822 options = [k, v] 1823 self._match_r_paren() 1824 1825 self._match(TokenType.ALIAS) 1826 return self.expression( 1827 exp.Cache, 1828 this=table, 1829 lazy=lazy, 1830 options=options, 1831 expression=self._parse_select(nested=True), 1832 ) 1833 1834 def _parse_partition(self) -> t.Optional[exp.Expression]: 1835 if not self._match(TokenType.PARTITION): 1836 return None 1837 1838 return self.expression( 1839 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction) 1840 ) 1841 1842 def _parse_value(self) -> exp.Expression: 1843 if self._match(TokenType.L_PAREN): 1844 expressions = self._parse_csv(self._parse_conjunction) 1845 self._match_r_paren() 1846 return self.expression(exp.Tuple, expressions=expressions) 1847 1848 # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows. 1849 # Source: https://prestodb.io/docs/current/sql/values.html 1850 return self.expression(exp.Tuple, expressions=[self._parse_conjunction()]) 1851 1852 def _parse_select( 1853 self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True 1854 ) -> t.Optional[exp.Expression]: 1855 cte = self._parse_with() 1856 if cte: 1857 this = self._parse_statement() 1858 1859 if not this: 1860 self.raise_error("Failed to parse any statement following CTE") 1861 return cte 1862 1863 if "with" in this.arg_types: 1864 this.set("with", cte) 1865 else: 1866 self.raise_error(f"{this.key} does not support CTE") 1867 this = cte 1868 elif self._match(TokenType.SELECT): 1869 comments = self._prev_comments 1870 1871 hint = self._parse_hint() 1872 all_ = self._match(TokenType.ALL) 1873 distinct = self._match(TokenType.DISTINCT) 1874 1875 kind = ( 1876 self._match(TokenType.ALIAS) 1877 and self._match_texts(("STRUCT", "VALUE")) 1878 and self._prev.text 1879 ) 1880 1881 if distinct: 1882 distinct = self.expression( 1883 exp.Distinct, 1884 on=self._parse_value() if self._match(TokenType.ON) else None, 1885 ) 1886 1887 if all_ and distinct: 1888 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 1889 1890 limit = self._parse_limit(top=True) 1891 expressions = self._parse_csv(self._parse_expression) 1892 1893 this = self.expression( 1894 exp.Select, 1895 kind=kind, 1896 hint=hint, 1897 distinct=distinct, 1898 expressions=expressions, 1899 limit=limit, 1900 ) 1901 this.comments = comments 1902 1903 into = self._parse_into() 1904 if into: 1905 this.set("into", into) 1906 1907 from_ = self._parse_from() 1908 if from_: 1909 this.set("from", from_) 1910 1911 this = self._parse_query_modifiers(this) 1912 elif (table or nested) and self._match(TokenType.L_PAREN): 1913 this = self._parse_table() if table else self._parse_select(nested=True) 1914 this = self._parse_set_operations(self._parse_query_modifiers(this)) 1915 self._match_r_paren() 1916 1917 # early return so that subquery unions aren't parsed again 1918 # SELECT * FROM (SELECT 1) UNION ALL SELECT 1 1919 # Union ALL should be a property of the top select node, not the subquery 1920 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 1921 elif self._match(TokenType.VALUES): 1922 this = self.expression( 1923 exp.Values, 1924 expressions=self._parse_csv(self._parse_value), 1925 alias=self._parse_table_alias(), 1926 ) 1927 else: 1928 this = None 1929 1930 return self._parse_set_operations(this) 1931 1932 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]: 1933 if not skip_with_token and not self._match(TokenType.WITH): 1934 return None 1935 1936 comments = self._prev_comments 1937 recursive = self._match(TokenType.RECURSIVE) 1938 1939 expressions = [] 1940 while True: 1941 expressions.append(self._parse_cte()) 1942 1943 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 1944 break 1945 else: 1946 self._match(TokenType.WITH) 1947 1948 return self.expression( 1949 exp.With, comments=comments, expressions=expressions, recursive=recursive 1950 ) 1951 1952 def _parse_cte(self) -> exp.Expression: 1953 alias = self._parse_table_alias() 1954 if not alias or not alias.this: 1955 self.raise_error("Expected CTE to have alias") 1956 1957 self._match(TokenType.ALIAS) 1958 1959 return self.expression( 1960 exp.CTE, 1961 this=self._parse_wrapped(self._parse_statement), 1962 alias=alias, 1963 ) 1964 1965 def _parse_table_alias( 1966 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 1967 ) -> t.Optional[exp.Expression]: 1968 any_token = self._match(TokenType.ALIAS) 1969 alias = ( 1970 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 1971 or self._parse_string_as_identifier() 1972 ) 1973 1974 index = self._index 1975 if self._match(TokenType.L_PAREN): 1976 columns = self._parse_csv(self._parse_function_parameter) 1977 self._match_r_paren() if columns else self._retreat(index) 1978 else: 1979 columns = None 1980 1981 if not alias and not columns: 1982 return None 1983 1984 return self.expression(exp.TableAlias, this=alias, columns=columns) 1985 1986 def _parse_subquery( 1987 self, this: t.Optional[exp.Expression], parse_alias: bool = True 1988 ) -> exp.Expression: 1989 return self.expression( 1990 exp.Subquery, 1991 this=this, 1992 pivots=self._parse_pivots(), 1993 alias=self._parse_table_alias() if parse_alias else None, 1994 ) 1995 1996 def _parse_query_modifiers( 1997 self, this: t.Optional[exp.Expression] 1998 ) -> t.Optional[exp.Expression]: 1999 if isinstance(this, self.MODIFIABLES): 2000 for key, parser in self.QUERY_MODIFIER_PARSERS.items(): 2001 expression = parser(self) 2002 2003 if expression: 2004 this.set(key, expression) 2005 return this 2006 2007 def _parse_hint(self) -> t.Optional[exp.Expression]: 2008 if self._match(TokenType.HINT): 2009 hints = self._parse_csv(self._parse_function) 2010 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 2011 self.raise_error("Expected */ after HINT") 2012 return self.expression(exp.Hint, expressions=hints) 2013 2014 return None 2015 2016 def _parse_into(self) -> t.Optional[exp.Expression]: 2017 if not self._match(TokenType.INTO): 2018 return None 2019 2020 temp = self._match(TokenType.TEMPORARY) 2021 unlogged = self._match(TokenType.UNLOGGED) 2022 self._match(TokenType.TABLE) 2023 2024 return self.expression( 2025 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 2026 ) 2027 2028 def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]: 2029 if not self._match(TokenType.FROM): 2030 return None 2031 2032 comments = self._prev_comments 2033 this = self._parse_table() 2034 2035 return self.expression( 2036 exp.From, 2037 comments=comments, 2038 this=self._parse_query_modifiers(this) if modifiers else this, 2039 ) 2040 2041 def _parse_match_recognize(self) -> t.Optional[exp.Expression]: 2042 if not self._match(TokenType.MATCH_RECOGNIZE): 2043 return None 2044 2045 self._match_l_paren() 2046 2047 partition = self._parse_partition_by() 2048 order = self._parse_order() 2049 measures = ( 2050 self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None 2051 ) 2052 2053 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 2054 rows = exp.Var(this="ONE ROW PER MATCH") 2055 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 2056 text = "ALL ROWS PER MATCH" 2057 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 2058 text += f" SHOW EMPTY MATCHES" 2059 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 2060 text += f" OMIT EMPTY MATCHES" 2061 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 2062 text += f" WITH UNMATCHED ROWS" 2063 rows = exp.Var(this=text) 2064 else: 2065 rows = None 2066 2067 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 2068 text = "AFTER MATCH SKIP" 2069 if self._match_text_seq("PAST", "LAST", "ROW"): 2070 text += f" PAST LAST ROW" 2071 elif self._match_text_seq("TO", "NEXT", "ROW"): 2072 text += f" TO NEXT ROW" 2073 elif self._match_text_seq("TO", "FIRST"): 2074 text += f" TO FIRST {self._advance_any().text}" # type: ignore 2075 elif self._match_text_seq("TO", "LAST"): 2076 text += f" TO LAST {self._advance_any().text}" # type: ignore 2077 after = exp.Var(this=text) 2078 else: 2079 after = None 2080 2081 if self._match_text_seq("PATTERN"): 2082 self._match_l_paren() 2083 2084 if not self._curr: 2085 self.raise_error("Expecting )", self._curr) 2086 2087 paren = 1 2088 start = self._curr 2089 2090 while self._curr and paren > 0: 2091 if self._curr.token_type == TokenType.L_PAREN: 2092 paren += 1 2093 if self._curr.token_type == TokenType.R_PAREN: 2094 paren -= 1 2095 end = self._prev 2096 self._advance() 2097 if paren > 0: 2098 self.raise_error("Expecting )", self._curr) 2099 pattern = exp.Var(this=self._find_sql(start, end)) 2100 else: 2101 pattern = None 2102 2103 define = ( 2104 self._parse_csv( 2105 lambda: self.expression( 2106 exp.Alias, 2107 alias=self._parse_id_var(any_token=True), 2108 this=self._match(TokenType.ALIAS) and self._parse_conjunction(), 2109 ) 2110 ) 2111 if self._match_text_seq("DEFINE") 2112 else None 2113 ) 2114 2115 self._match_r_paren() 2116 2117 return self.expression( 2118 exp.MatchRecognize, 2119 partition_by=partition, 2120 order=order, 2121 measures=measures, 2122 rows=rows, 2123 after=after, 2124 pattern=pattern, 2125 define=define, 2126 alias=self._parse_table_alias(), 2127 ) 2128 2129 def _parse_lateral(self) -> t.Optional[exp.Expression]: 2130 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY) 2131 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 2132 2133 if outer_apply or cross_apply: 2134 this = self._parse_select(table=True) 2135 view = None 2136 outer = not cross_apply 2137 elif self._match(TokenType.LATERAL): 2138 this = self._parse_select(table=True) 2139 view = self._match(TokenType.VIEW) 2140 outer = self._match(TokenType.OUTER) 2141 else: 2142 return None 2143 2144 if not this: 2145 this = self._parse_function() or self._parse_id_var(any_token=False) 2146 while self._match(TokenType.DOT): 2147 this = exp.Dot( 2148 this=this, 2149 expression=self._parse_function() or self._parse_id_var(any_token=False), 2150 ) 2151 2152 table_alias: t.Optional[exp.Expression] 2153 2154 if view: 2155 table = self._parse_id_var(any_token=False) 2156 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 2157 table_alias = self.expression(exp.TableAlias, this=table, columns=columns) 2158 else: 2159 table_alias = self._parse_table_alias() 2160 2161 expression = self.expression( 2162 exp.Lateral, 2163 this=this, 2164 view=view, 2165 outer=outer, 2166 alias=table_alias, 2167 ) 2168 2169 return expression 2170 2171 def _parse_join_side_and_kind( 2172 self, 2173 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 2174 return ( 2175 self._match(TokenType.NATURAL) and self._prev, 2176 self._match_set(self.JOIN_SIDES) and self._prev, 2177 self._match_set(self.JOIN_KINDS) and self._prev, 2178 ) 2179 2180 def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]: 2181 if self._match(TokenType.COMMA): 2182 return self.expression(exp.Join, this=self._parse_table()) 2183 2184 index = self._index 2185 natural, side, kind = self._parse_join_side_and_kind() 2186 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 2187 join = self._match(TokenType.JOIN) 2188 2189 if not skip_join_token and not join: 2190 self._retreat(index) 2191 kind = None 2192 natural = None 2193 side = None 2194 2195 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 2196 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 2197 2198 if not skip_join_token and not join and not outer_apply and not cross_apply: 2199 return None 2200 2201 if outer_apply: 2202 side = Token(TokenType.LEFT, "LEFT") 2203 2204 kwargs: t.Dict[ 2205 str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]] 2206 ] = {"this": self._parse_table()} 2207 2208 if natural: 2209 kwargs["natural"] = True 2210 if side: 2211 kwargs["side"] = side.text 2212 if kind: 2213 kwargs["kind"] = kind.text 2214 if hint: 2215 kwargs["hint"] = hint 2216 2217 if self._match(TokenType.ON): 2218 kwargs["on"] = self._parse_conjunction() 2219 elif self._match(TokenType.USING): 2220 kwargs["using"] = self._parse_wrapped_id_vars() 2221 2222 return self.expression(exp.Join, **kwargs) # type: ignore 2223 2224 def _parse_index(self) -> exp.Expression: 2225 index = self._parse_id_var() 2226 self._match(TokenType.ON) 2227 self._match(TokenType.TABLE) # hive 2228 2229 return self.expression( 2230 exp.Index, 2231 this=index, 2232 table=self.expression(exp.Table, this=self._parse_id_var()), 2233 columns=self._parse_expression(), 2234 ) 2235 2236 def _parse_create_table_index(self) -> t.Optional[exp.Expression]: 2237 unique = self._match(TokenType.UNIQUE) 2238 primary = self._match_text_seq("PRIMARY") 2239 amp = self._match_text_seq("AMP") 2240 if not self._match(TokenType.INDEX): 2241 return None 2242 index = self._parse_id_var() 2243 columns = None 2244 if self._match(TokenType.L_PAREN, advance=False): 2245 columns = self._parse_wrapped_csv(self._parse_column) 2246 return self.expression( 2247 exp.Index, 2248 this=index, 2249 columns=columns, 2250 unique=unique, 2251 primary=primary, 2252 amp=amp, 2253 ) 2254 2255 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 2256 return ( 2257 (not schema and self._parse_function()) 2258 or self._parse_id_var(any_token=False) 2259 or self._parse_string_as_identifier() 2260 or self._parse_placeholder() 2261 ) 2262 2263 def _parse_table_parts(self, schema: bool = False) -> exp.Expression: 2264 catalog = None 2265 db = None 2266 table = self._parse_table_part(schema=schema) 2267 2268 while self._match(TokenType.DOT): 2269 if catalog: 2270 # This allows nesting the table in arbitrarily many dot expressions if needed 2271 table = self.expression( 2272 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 2273 ) 2274 else: 2275 catalog = db 2276 db = table 2277 table = self._parse_table_part(schema=schema) 2278 2279 if not table: 2280 self.raise_error(f"Expected table name but got {self._curr}") 2281 2282 return self.expression( 2283 exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots() 2284 ) 2285 2286 def _parse_table( 2287 self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None 2288 ) -> t.Optional[exp.Expression]: 2289 lateral = self._parse_lateral() 2290 if lateral: 2291 return lateral 2292 2293 unnest = self._parse_unnest() 2294 if unnest: 2295 return unnest 2296 2297 values = self._parse_derived_table_values() 2298 if values: 2299 return values 2300 2301 subquery = self._parse_select(table=True) 2302 if subquery: 2303 if not subquery.args.get("pivots"): 2304 subquery.set("pivots", self._parse_pivots()) 2305 return subquery 2306 2307 this = self._parse_table_parts(schema=schema) 2308 2309 if schema: 2310 return self._parse_schema(this=this) 2311 2312 if self.alias_post_tablesample: 2313 table_sample = self._parse_table_sample() 2314 2315 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 2316 if alias: 2317 this.set("alias", alias) 2318 2319 if not this.args.get("pivots"): 2320 this.set("pivots", self._parse_pivots()) 2321 2322 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 2323 this.set( 2324 "hints", 2325 self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)), 2326 ) 2327 self._match_r_paren() 2328 2329 if not self.alias_post_tablesample: 2330 table_sample = self._parse_table_sample() 2331 2332 if table_sample: 2333 table_sample.set("this", this) 2334 this = table_sample 2335 2336 return this 2337 2338 def _parse_unnest(self) -> t.Optional[exp.Expression]: 2339 if not self._match(TokenType.UNNEST): 2340 return None 2341 2342 expressions = self._parse_wrapped_csv(self._parse_type) 2343 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 2344 alias = self._parse_table_alias() 2345 2346 if alias and self.unnest_column_only: 2347 if alias.args.get("columns"): 2348 self.raise_error("Unexpected extra column alias in unnest.") 2349 alias.set("columns", [alias.this]) 2350 alias.set("this", None) 2351 2352 offset = None 2353 if self._match_pair(TokenType.WITH, TokenType.OFFSET): 2354 self._match(TokenType.ALIAS) 2355 offset = self._parse_id_var() or exp.Identifier(this="offset") 2356 2357 return self.expression( 2358 exp.Unnest, 2359 expressions=expressions, 2360 ordinality=ordinality, 2361 alias=alias, 2362 offset=offset, 2363 ) 2364 2365 def _parse_derived_table_values(self) -> t.Optional[exp.Expression]: 2366 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 2367 if not is_derived and not self._match(TokenType.VALUES): 2368 return None 2369 2370 expressions = self._parse_csv(self._parse_value) 2371 2372 if is_derived: 2373 self._match_r_paren() 2374 2375 return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias()) 2376 2377 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]: 2378 if not self._match(TokenType.TABLE_SAMPLE) and not ( 2379 as_modifier and self._match_text_seq("USING", "SAMPLE") 2380 ): 2381 return None 2382 2383 bucket_numerator = None 2384 bucket_denominator = None 2385 bucket_field = None 2386 percent = None 2387 rows = None 2388 size = None 2389 seed = None 2390 2391 kind = ( 2392 self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE" 2393 ) 2394 method = self._parse_var(tokens=(TokenType.ROW,)) 2395 2396 self._match(TokenType.L_PAREN) 2397 2398 num = self._parse_number() 2399 2400 if self._match(TokenType.BUCKET): 2401 bucket_numerator = self._parse_number() 2402 self._match(TokenType.OUT_OF) 2403 bucket_denominator = bucket_denominator = self._parse_number() 2404 self._match(TokenType.ON) 2405 bucket_field = self._parse_field() 2406 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 2407 percent = num 2408 elif self._match(TokenType.ROWS): 2409 rows = num 2410 else: 2411 size = num 2412 2413 self._match(TokenType.R_PAREN) 2414 2415 if self._match(TokenType.L_PAREN): 2416 method = self._parse_var() 2417 seed = self._match(TokenType.COMMA) and self._parse_number() 2418 self._match_r_paren() 2419 elif self._match_texts(("SEED", "REPEATABLE")): 2420 seed = self._parse_wrapped(self._parse_number) 2421 2422 return self.expression( 2423 exp.TableSample, 2424 method=method, 2425 bucket_numerator=bucket_numerator, 2426 bucket_denominator=bucket_denominator, 2427 bucket_field=bucket_field, 2428 percent=percent, 2429 rows=rows, 2430 size=size, 2431 seed=seed, 2432 kind=kind, 2433 ) 2434 2435 def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]: 2436 return list(iter(self._parse_pivot, None)) 2437 2438 def _parse_pivot(self) -> t.Optional[exp.Expression]: 2439 index = self._index 2440 2441 if self._match(TokenType.PIVOT): 2442 unpivot = False 2443 elif self._match(TokenType.UNPIVOT): 2444 unpivot = True 2445 else: 2446 return None 2447 2448 expressions = [] 2449 field = None 2450 2451 if not self._match(TokenType.L_PAREN): 2452 self._retreat(index) 2453 return None 2454 2455 if unpivot: 2456 expressions = self._parse_csv(self._parse_column) 2457 else: 2458 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 2459 2460 if not expressions: 2461 self.raise_error("Failed to parse PIVOT's aggregation list") 2462 2463 if not self._match(TokenType.FOR): 2464 self.raise_error("Expecting FOR") 2465 2466 value = self._parse_column() 2467 2468 if not self._match(TokenType.IN): 2469 self.raise_error("Expecting IN") 2470 2471 field = self._parse_in(value) 2472 2473 self._match_r_paren() 2474 2475 pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot) 2476 2477 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 2478 pivot.set("alias", self._parse_table_alias()) 2479 2480 if not unpivot: 2481 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 2482 2483 columns: t.List[exp.Expression] = [] 2484 for fld in pivot.args["field"].expressions: 2485 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 2486 for name in names: 2487 if self.PREFIXED_PIVOT_COLUMNS: 2488 name = f"{name}_{field_name}" if name else field_name 2489 else: 2490 name = f"{field_name}_{name}" if name else field_name 2491 2492 columns.append(exp.to_identifier(name)) 2493 2494 pivot.set("columns", columns) 2495 2496 return pivot 2497 2498 def _pivot_column_names(self, pivot_columns: t.List[exp.Expression]) -> t.List[str]: 2499 return [agg.alias for agg in pivot_columns] 2500 2501 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]: 2502 if not skip_where_token and not self._match(TokenType.WHERE): 2503 return None 2504 2505 return self.expression( 2506 exp.Where, comments=self._prev_comments, this=self._parse_conjunction() 2507 ) 2508 2509 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]: 2510 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 2511 return None 2512 2513 elements = defaultdict(list) 2514 2515 while True: 2516 expressions = self._parse_csv(self._parse_conjunction) 2517 if expressions: 2518 elements["expressions"].extend(expressions) 2519 2520 grouping_sets = self._parse_grouping_sets() 2521 if grouping_sets: 2522 elements["grouping_sets"].extend(grouping_sets) 2523 2524 rollup = None 2525 cube = None 2526 totals = None 2527 2528 with_ = self._match(TokenType.WITH) 2529 if self._match(TokenType.ROLLUP): 2530 rollup = with_ or self._parse_wrapped_csv(self._parse_column) 2531 elements["rollup"].extend(ensure_list(rollup)) 2532 2533 if self._match(TokenType.CUBE): 2534 cube = with_ or self._parse_wrapped_csv(self._parse_column) 2535 elements["cube"].extend(ensure_list(cube)) 2536 2537 if self._match_text_seq("TOTALS"): 2538 totals = True 2539 elements["totals"] = True # type: ignore 2540 2541 if not (grouping_sets or rollup or cube or totals): 2542 break 2543 2544 return self.expression(exp.Group, **elements) # type: ignore 2545 2546 def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 2547 if not self._match(TokenType.GROUPING_SETS): 2548 return None 2549 2550 return self._parse_wrapped_csv(self._parse_grouping_set) 2551 2552 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 2553 if self._match(TokenType.L_PAREN): 2554 grouping_set = self._parse_csv(self._parse_column) 2555 self._match_r_paren() 2556 return self.expression(exp.Tuple, expressions=grouping_set) 2557 2558 return self._parse_column() 2559 2560 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]: 2561 if not skip_having_token and not self._match(TokenType.HAVING): 2562 return None 2563 return self.expression(exp.Having, this=self._parse_conjunction()) 2564 2565 def _parse_qualify(self) -> t.Optional[exp.Expression]: 2566 if not self._match(TokenType.QUALIFY): 2567 return None 2568 return self.expression(exp.Qualify, this=self._parse_conjunction()) 2569 2570 def _parse_order( 2571 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 2572 ) -> t.Optional[exp.Expression]: 2573 if not skip_order_token and not self._match(TokenType.ORDER_BY): 2574 return this 2575 2576 return self.expression( 2577 exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered) 2578 ) 2579 2580 def _parse_sort( 2581 self, token_type: TokenType, exp_class: t.Type[exp.Expression] 2582 ) -> t.Optional[exp.Expression]: 2583 if not self._match(token_type): 2584 return None 2585 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 2586 2587 def _parse_ordered(self) -> exp.Expression: 2588 this = self._parse_conjunction() 2589 self._match(TokenType.ASC) 2590 is_desc = self._match(TokenType.DESC) 2591 is_nulls_first = self._match(TokenType.NULLS_FIRST) 2592 is_nulls_last = self._match(TokenType.NULLS_LAST) 2593 desc = is_desc or False 2594 asc = not desc 2595 nulls_first = is_nulls_first or False 2596 explicitly_null_ordered = is_nulls_first or is_nulls_last 2597 if ( 2598 not explicitly_null_ordered 2599 and ( 2600 (asc and self.null_ordering == "nulls_are_small") 2601 or (desc and self.null_ordering != "nulls_are_small") 2602 ) 2603 and self.null_ordering != "nulls_are_last" 2604 ): 2605 nulls_first = True 2606 2607 return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first) 2608 2609 def _parse_limit( 2610 self, this: t.Optional[exp.Expression] = None, top: bool = False 2611 ) -> t.Optional[exp.Expression]: 2612 if self._match(TokenType.TOP if top else TokenType.LIMIT): 2613 limit_paren = self._match(TokenType.L_PAREN) 2614 limit_exp = self.expression( 2615 exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term() 2616 ) 2617 2618 if limit_paren: 2619 self._match_r_paren() 2620 2621 return limit_exp 2622 2623 if self._match(TokenType.FETCH): 2624 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 2625 direction = self._prev.text if direction else "FIRST" 2626 2627 count = self._parse_number() 2628 percent = self._match(TokenType.PERCENT) 2629 2630 self._match_set((TokenType.ROW, TokenType.ROWS)) 2631 2632 only = self._match(TokenType.ONLY) 2633 with_ties = self._match_text_seq("WITH", "TIES") 2634 2635 if only and with_ties: 2636 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 2637 2638 return self.expression( 2639 exp.Fetch, 2640 direction=direction, 2641 count=count, 2642 percent=percent, 2643 with_ties=with_ties, 2644 ) 2645 2646 return this 2647 2648 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 2649 if not self._match_set((TokenType.OFFSET, TokenType.COMMA)): 2650 return this 2651 2652 count = self._parse_number() 2653 self._match_set((TokenType.ROW, TokenType.ROWS)) 2654 return self.expression(exp.Offset, this=this, expression=count) 2655 2656 def _parse_lock(self) -> t.Optional[exp.Expression]: 2657 if self._match_text_seq("FOR", "UPDATE"): 2658 return self.expression(exp.Lock, update=True) 2659 if self._match_text_seq("FOR", "SHARE"): 2660 return self.expression(exp.Lock, update=False) 2661 2662 return None 2663 2664 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2665 if not self._match_set(self.SET_OPERATIONS): 2666 return this 2667 2668 token_type = self._prev.token_type 2669 2670 if token_type == TokenType.UNION: 2671 expression = exp.Union 2672 elif token_type == TokenType.EXCEPT: 2673 expression = exp.Except 2674 else: 2675 expression = exp.Intersect 2676 2677 return self.expression( 2678 expression, 2679 this=this, 2680 distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL), 2681 expression=self._parse_set_operations(self._parse_select(nested=True)), 2682 ) 2683 2684 def _parse_expression(self, explicit_alias: bool = False) -> t.Optional[exp.Expression]: 2685 return self._parse_alias(self._parse_conjunction(), explicit=explicit_alias) 2686 2687 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 2688 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 2689 2690 def _parse_equality(self) -> t.Optional[exp.Expression]: 2691 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 2692 2693 def _parse_comparison(self) -> t.Optional[exp.Expression]: 2694 return self._parse_tokens(self._parse_range, self.COMPARISON) 2695 2696 def _parse_range(self) -> t.Optional[exp.Expression]: 2697 this = self._parse_bitwise() 2698 negate = self._match(TokenType.NOT) 2699 2700 if self._match_set(self.RANGE_PARSERS): 2701 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 2702 if not expression: 2703 return this 2704 2705 this = expression 2706 elif self._match(TokenType.ISNULL): 2707 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2708 2709 # Postgres supports ISNULL and NOTNULL for conditions. 2710 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 2711 if self._match(TokenType.NOTNULL): 2712 this = self.expression(exp.Is, this=this, expression=exp.Null()) 2713 this = self.expression(exp.Not, this=this) 2714 2715 if negate: 2716 this = self.expression(exp.Not, this=this) 2717 2718 if self._match(TokenType.IS): 2719 this = self._parse_is(this) 2720 2721 return this 2722 2723 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2724 index = self._index - 1 2725 negate = self._match(TokenType.NOT) 2726 if self._match(TokenType.DISTINCT_FROM): 2727 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 2728 return self.expression(klass, this=this, expression=self._parse_expression()) 2729 2730 expression = self._parse_null() or self._parse_boolean() 2731 if not expression: 2732 self._retreat(index) 2733 return None 2734 2735 this = self.expression(exp.Is, this=this, expression=expression) 2736 return self.expression(exp.Not, this=this) if negate else this 2737 2738 def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression: 2739 unnest = self._parse_unnest() 2740 if unnest: 2741 this = self.expression(exp.In, this=this, unnest=unnest) 2742 elif self._match(TokenType.L_PAREN): 2743 expressions = self._parse_csv(self._parse_select_or_expression) 2744 2745 if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable): 2746 this = self.expression(exp.In, this=this, query=expressions[0]) 2747 else: 2748 this = self.expression(exp.In, this=this, expressions=expressions) 2749 2750 self._match_r_paren(this) 2751 else: 2752 this = self.expression(exp.In, this=this, field=self._parse_field()) 2753 2754 return this 2755 2756 def _parse_between(self, this: exp.Expression) -> exp.Expression: 2757 low = self._parse_bitwise() 2758 self._match(TokenType.AND) 2759 high = self._parse_bitwise() 2760 return self.expression(exp.Between, this=this, low=low, high=high) 2761 2762 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2763 if not self._match(TokenType.ESCAPE): 2764 return this 2765 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 2766 2767 def _parse_interval(self) -> t.Optional[exp.Expression]: 2768 if not self._match(TokenType.INTERVAL): 2769 return None 2770 2771 this = self._parse_primary() or self._parse_term() 2772 unit = self._parse_function() or self._parse_var() 2773 2774 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 2775 # each INTERVAL expression into this canonical form so it's easy to transpile 2776 if this and isinstance(this, exp.Literal): 2777 if this.is_number: 2778 this = exp.Literal.string(this.name) 2779 2780 # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year' 2781 parts = this.name.split() 2782 if not unit and len(parts) <= 2: 2783 this = exp.Literal.string(seq_get(parts, 0)) 2784 unit = self.expression(exp.Var, this=seq_get(parts, 1)) 2785 2786 return self.expression(exp.Interval, this=this, unit=unit) 2787 2788 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 2789 this = self._parse_term() 2790 2791 while True: 2792 if self._match_set(self.BITWISE): 2793 this = self.expression( 2794 self.BITWISE[self._prev.token_type], 2795 this=this, 2796 expression=self._parse_term(), 2797 ) 2798 elif self._match_pair(TokenType.LT, TokenType.LT): 2799 this = self.expression( 2800 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 2801 ) 2802 elif self._match_pair(TokenType.GT, TokenType.GT): 2803 this = self.expression( 2804 exp.BitwiseRightShift, this=this, expression=self._parse_term() 2805 ) 2806 else: 2807 break 2808 2809 return this 2810 2811 def _parse_term(self) -> t.Optional[exp.Expression]: 2812 return self._parse_tokens(self._parse_factor, self.TERM) 2813 2814 def _parse_factor(self) -> t.Optional[exp.Expression]: 2815 return self._parse_tokens(self._parse_unary, self.FACTOR) 2816 2817 def _parse_unary(self) -> t.Optional[exp.Expression]: 2818 if self._match_set(self.UNARY_PARSERS): 2819 return self.UNARY_PARSERS[self._prev.token_type](self) 2820 return self._parse_at_time_zone(self._parse_type()) 2821 2822 def _parse_type(self) -> t.Optional[exp.Expression]: 2823 interval = self._parse_interval() 2824 if interval: 2825 return interval 2826 2827 index = self._index 2828 data_type = self._parse_types(check_func=True) 2829 this = self._parse_column() 2830 2831 if data_type: 2832 if isinstance(this, exp.Literal): 2833 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 2834 if parser: 2835 return parser(self, this, data_type) 2836 return self.expression(exp.Cast, this=this, to=data_type) 2837 if not data_type.expressions: 2838 self._retreat(index) 2839 return self._parse_column() 2840 return data_type 2841 2842 return this 2843 2844 def _parse_type_size(self) -> t.Optional[exp.Expression]: 2845 this = self._parse_type() 2846 if not this: 2847 return None 2848 2849 return self.expression( 2850 exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True) 2851 ) 2852 2853 def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]: 2854 index = self._index 2855 2856 prefix = self._match_text_seq("SYSUDTLIB", ".") 2857 2858 if not self._match_set(self.TYPE_TOKENS): 2859 return None 2860 2861 type_token = self._prev.token_type 2862 2863 if type_token == TokenType.PSEUDO_TYPE: 2864 return self.expression(exp.PseudoType, this=self._prev.text) 2865 2866 nested = type_token in self.NESTED_TYPE_TOKENS 2867 is_struct = type_token == TokenType.STRUCT 2868 expressions = None 2869 maybe_func = False 2870 2871 if self._match(TokenType.L_PAREN): 2872 if is_struct: 2873 expressions = self._parse_csv(self._parse_struct_types) 2874 elif nested: 2875 expressions = self._parse_csv(self._parse_types) 2876 else: 2877 expressions = self._parse_csv(self._parse_type_size) 2878 2879 if not expressions or not self._match(TokenType.R_PAREN): 2880 self._retreat(index) 2881 return None 2882 2883 maybe_func = True 2884 2885 if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2886 this = exp.DataType( 2887 this=exp.DataType.Type.ARRAY, 2888 expressions=[exp.DataType.build(type_token.value, expressions=expressions)], 2889 nested=True, 2890 ) 2891 2892 while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET): 2893 this = exp.DataType( 2894 this=exp.DataType.Type.ARRAY, 2895 expressions=[this], 2896 nested=True, 2897 ) 2898 2899 return this 2900 2901 if self._match(TokenType.L_BRACKET): 2902 self._retreat(index) 2903 return None 2904 2905 values: t.Optional[t.List[t.Optional[exp.Expression]]] = None 2906 if nested and self._match(TokenType.LT): 2907 if is_struct: 2908 expressions = self._parse_csv(self._parse_struct_types) 2909 else: 2910 expressions = self._parse_csv(self._parse_types) 2911 2912 if not self._match(TokenType.GT): 2913 self.raise_error("Expecting >") 2914 2915 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 2916 values = self._parse_csv(self._parse_conjunction) 2917 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 2918 2919 value: t.Optional[exp.Expression] = None 2920 if type_token in self.TIMESTAMPS: 2921 if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ: 2922 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions) 2923 elif ( 2924 self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ 2925 ): 2926 value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 2927 elif self._match(TokenType.WITHOUT_TIME_ZONE): 2928 if type_token == TokenType.TIME: 2929 value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions) 2930 else: 2931 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2932 2933 maybe_func = maybe_func and value is None 2934 2935 if value is None: 2936 value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions) 2937 elif type_token == TokenType.INTERVAL: 2938 unit = self._parse_var() 2939 2940 if not unit: 2941 value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 2942 else: 2943 value = self.expression(exp.Interval, unit=unit) 2944 2945 if maybe_func and check_func: 2946 index2 = self._index 2947 peek = self._parse_string() 2948 2949 if not peek: 2950 self._retreat(index) 2951 return None 2952 2953 self._retreat(index2) 2954 2955 if value: 2956 return value 2957 2958 return exp.DataType( 2959 this=exp.DataType.Type[type_token.value.upper()], 2960 expressions=expressions, 2961 nested=nested, 2962 values=values, 2963 prefix=prefix, 2964 ) 2965 2966 def _parse_struct_types(self) -> t.Optional[exp.Expression]: 2967 this = self._parse_type() or self._parse_id_var() 2968 self._match(TokenType.COLON) 2969 return self._parse_column_def(this) 2970 2971 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 2972 if not self._match(TokenType.AT_TIME_ZONE): 2973 return this 2974 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 2975 2976 def _parse_column(self) -> t.Optional[exp.Expression]: 2977 this = self._parse_field() 2978 if isinstance(this, exp.Identifier): 2979 this = self.expression(exp.Column, this=this) 2980 elif not this: 2981 return self._parse_bracket(this) 2982 this = self._parse_bracket(this) 2983 2984 while self._match_set(self.COLUMN_OPERATORS): 2985 op_token = self._prev.token_type 2986 op = self.COLUMN_OPERATORS.get(op_token) 2987 2988 if op_token == TokenType.DCOLON: 2989 field = self._parse_types() 2990 if not field: 2991 self.raise_error("Expected type") 2992 elif op and self._curr: 2993 self._advance() 2994 value = self._prev.text 2995 field = ( 2996 exp.Literal.number(value) 2997 if self._prev.token_type == TokenType.NUMBER 2998 else exp.Literal.string(value) 2999 ) 3000 else: 3001 field = ( 3002 self._parse_star() 3003 or self._parse_function(anonymous=True) 3004 or self._parse_id_var() 3005 ) 3006 3007 if isinstance(field, exp.Func): 3008 # bigquery allows function calls like x.y.count(...) 3009 # SAFE.SUBSTR(...) 3010 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 3011 this = self._replace_columns_with_dots(this) 3012 3013 if op: 3014 this = op(self, this, field) 3015 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 3016 this = self.expression( 3017 exp.Column, 3018 this=field, 3019 table=this.this, 3020 db=this.args.get("table"), 3021 catalog=this.args.get("db"), 3022 ) 3023 else: 3024 this = self.expression(exp.Dot, this=this, expression=field) 3025 this = self._parse_bracket(this) 3026 3027 return this 3028 3029 def _parse_primary(self) -> t.Optional[exp.Expression]: 3030 if self._match_set(self.PRIMARY_PARSERS): 3031 token_type = self._prev.token_type 3032 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 3033 3034 if token_type == TokenType.STRING: 3035 expressions = [primary] 3036 while self._match(TokenType.STRING): 3037 expressions.append(exp.Literal.string(self._prev.text)) 3038 if len(expressions) > 1: 3039 return self.expression(exp.Concat, expressions=expressions) 3040 return primary 3041 3042 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 3043 return exp.Literal.number(f"0.{self._prev.text}") 3044 3045 if self._match(TokenType.L_PAREN): 3046 comments = self._prev_comments 3047 query = self._parse_select() 3048 3049 if query: 3050 expressions = [query] 3051 else: 3052 expressions = self._parse_csv(lambda: self._parse_expression(explicit_alias=True)) 3053 3054 this = self._parse_query_modifiers(seq_get(expressions, 0)) 3055 3056 if isinstance(this, exp.Subqueryable): 3057 this = self._parse_set_operations( 3058 self._parse_subquery(this=this, parse_alias=False) 3059 ) 3060 elif len(expressions) > 1: 3061 this = self.expression(exp.Tuple, expressions=expressions) 3062 else: 3063 this = self.expression(exp.Paren, this=self._parse_set_operations(this)) 3064 3065 if this: 3066 this.add_comments(comments) 3067 self._match_r_paren(expression=this) 3068 3069 return this 3070 3071 return None 3072 3073 def _parse_field( 3074 self, 3075 any_token: bool = False, 3076 tokens: t.Optional[t.Collection[TokenType]] = None, 3077 ) -> t.Optional[exp.Expression]: 3078 return ( 3079 self._parse_primary() 3080 or self._parse_function() 3081 or self._parse_id_var(any_token=any_token, tokens=tokens) 3082 ) 3083 3084 def _parse_function( 3085 self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False 3086 ) -> t.Optional[exp.Expression]: 3087 if not self._curr: 3088 return None 3089 3090 token_type = self._curr.token_type 3091 3092 if self._match_set(self.NO_PAREN_FUNCTION_PARSERS): 3093 return self.NO_PAREN_FUNCTION_PARSERS[token_type](self) 3094 3095 if not self._next or self._next.token_type != TokenType.L_PAREN: 3096 if token_type in self.NO_PAREN_FUNCTIONS: 3097 self._advance() 3098 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 3099 3100 return None 3101 3102 if token_type not in self.FUNC_TOKENS: 3103 return None 3104 3105 this = self._curr.text 3106 upper = this.upper() 3107 self._advance(2) 3108 3109 parser = self.FUNCTION_PARSERS.get(upper) 3110 3111 if parser and not anonymous: 3112 this = parser(self) 3113 else: 3114 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 3115 3116 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 3117 this = self.expression(subquery_predicate, this=self._parse_select()) 3118 self._match_r_paren() 3119 return this 3120 3121 if functions is None: 3122 functions = self.FUNCTIONS 3123 3124 function = functions.get(upper) 3125 args = self._parse_csv(self._parse_lambda) 3126 3127 if function and not anonymous: 3128 this = function(args) 3129 self.validate_expression(this, args) 3130 else: 3131 this = self.expression(exp.Anonymous, this=this, expressions=args) 3132 3133 self._match_r_paren(this) 3134 return self._parse_window(this) 3135 3136 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 3137 return self._parse_column_def(self._parse_id_var()) 3138 3139 def _parse_user_defined_function( 3140 self, kind: t.Optional[TokenType] = None 3141 ) -> t.Optional[exp.Expression]: 3142 this = self._parse_id_var() 3143 3144 while self._match(TokenType.DOT): 3145 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 3146 3147 if not self._match(TokenType.L_PAREN): 3148 return this 3149 3150 expressions = self._parse_csv(self._parse_function_parameter) 3151 self._match_r_paren() 3152 return self.expression( 3153 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 3154 ) 3155 3156 def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]: 3157 literal = self._parse_primary() 3158 if literal: 3159 return self.expression(exp.Introducer, this=token.text, expression=literal) 3160 3161 return self.expression(exp.Identifier, this=token.text) 3162 3163 def _parse_national(self, token: Token) -> exp.Expression: 3164 return self.expression(exp.National, this=exp.Literal.string(token.text)) 3165 3166 def _parse_session_parameter(self) -> exp.Expression: 3167 kind = None 3168 this = self._parse_id_var() or self._parse_primary() 3169 3170 if this and self._match(TokenType.DOT): 3171 kind = this.name 3172 this = self._parse_var() or self._parse_primary() 3173 3174 return self.expression(exp.SessionParameter, this=this, kind=kind) 3175 3176 def _parse_lambda(self) -> t.Optional[exp.Expression]: 3177 index = self._index 3178 3179 if self._match(TokenType.L_PAREN): 3180 expressions = self._parse_csv(self._parse_id_var) 3181 3182 if not self._match(TokenType.R_PAREN): 3183 self._retreat(index) 3184 else: 3185 expressions = [self._parse_id_var()] 3186 3187 if self._match_set(self.LAMBDAS): 3188 return self.LAMBDAS[self._prev.token_type](self, expressions) 3189 3190 self._retreat(index) 3191 3192 this: t.Optional[exp.Expression] 3193 3194 if self._match(TokenType.DISTINCT): 3195 this = self.expression( 3196 exp.Distinct, expressions=self._parse_csv(self._parse_conjunction) 3197 ) 3198 else: 3199 this = self._parse_select_or_expression() 3200 3201 if isinstance(this, exp.EQ): 3202 left = this.this 3203 if isinstance(left, exp.Column): 3204 left.replace(exp.Var(this=left.text("this"))) 3205 3206 return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this))) 3207 3208 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 3209 index = self._index 3210 3211 try: 3212 if self._parse_select(nested=True): 3213 return this 3214 except Exception: 3215 pass 3216 finally: 3217 self._retreat(index) 3218 3219 if not self._match(TokenType.L_PAREN): 3220 return this 3221 3222 args = self._parse_csv( 3223 lambda: self._parse_constraint() 3224 or self._parse_column_def(self._parse_field(any_token=True)) 3225 ) 3226 self._match_r_paren() 3227 return self.expression(exp.Schema, this=this, expressions=args) 3228 3229 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3230 # column defs are not really columns, they're identifiers 3231 if isinstance(this, exp.Column): 3232 this = this.this 3233 kind = self._parse_types() 3234 3235 if self._match_text_seq("FOR", "ORDINALITY"): 3236 return self.expression(exp.ColumnDef, this=this, ordinality=True) 3237 3238 constraints = [] 3239 while True: 3240 constraint = self._parse_column_constraint() 3241 if not constraint: 3242 break 3243 constraints.append(constraint) 3244 3245 if not kind and not constraints: 3246 return this 3247 3248 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 3249 3250 def _parse_auto_increment(self) -> exp.Expression: 3251 start = None 3252 increment = None 3253 3254 if self._match(TokenType.L_PAREN, advance=False): 3255 args = self._parse_wrapped_csv(self._parse_bitwise) 3256 start = seq_get(args, 0) 3257 increment = seq_get(args, 1) 3258 elif self._match_text_seq("START"): 3259 start = self._parse_bitwise() 3260 self._match_text_seq("INCREMENT") 3261 increment = self._parse_bitwise() 3262 3263 if start and increment: 3264 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 3265 3266 return exp.AutoIncrementColumnConstraint() 3267 3268 def _parse_compress(self) -> exp.Expression: 3269 if self._match(TokenType.L_PAREN, advance=False): 3270 return self.expression( 3271 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 3272 ) 3273 3274 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 3275 3276 def _parse_generated_as_identity(self) -> exp.Expression: 3277 if self._match(TokenType.BY_DEFAULT): 3278 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 3279 this = self.expression( 3280 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 3281 ) 3282 else: 3283 self._match_text_seq("ALWAYS") 3284 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 3285 3286 self._match_text_seq("AS", "IDENTITY") 3287 if self._match(TokenType.L_PAREN): 3288 if self._match_text_seq("START", "WITH"): 3289 this.set("start", self._parse_bitwise()) 3290 if self._match_text_seq("INCREMENT", "BY"): 3291 this.set("increment", self._parse_bitwise()) 3292 if self._match_text_seq("MINVALUE"): 3293 this.set("minvalue", self._parse_bitwise()) 3294 if self._match_text_seq("MAXVALUE"): 3295 this.set("maxvalue", self._parse_bitwise()) 3296 3297 if self._match_text_seq("CYCLE"): 3298 this.set("cycle", True) 3299 elif self._match_text_seq("NO", "CYCLE"): 3300 this.set("cycle", False) 3301 3302 self._match_r_paren() 3303 3304 return this 3305 3306 def _parse_inline(self) -> t.Optional[exp.Expression]: 3307 self._match_text_seq("LENGTH") 3308 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 3309 3310 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 3311 if self._match_text_seq("NULL"): 3312 return self.expression(exp.NotNullColumnConstraint) 3313 if self._match_text_seq("CASESPECIFIC"): 3314 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 3315 return None 3316 3317 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 3318 if self._match(TokenType.CONSTRAINT): 3319 this = self._parse_id_var() 3320 else: 3321 this = None 3322 3323 if self._match_texts(self.CONSTRAINT_PARSERS): 3324 return self.expression( 3325 exp.ColumnConstraint, 3326 this=this, 3327 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 3328 ) 3329 3330 return this 3331 3332 def _parse_constraint(self) -> t.Optional[exp.Expression]: 3333 if not self._match(TokenType.CONSTRAINT): 3334 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 3335 3336 this = self._parse_id_var() 3337 expressions = [] 3338 3339 while True: 3340 constraint = self._parse_unnamed_constraint() or self._parse_function() 3341 if not constraint: 3342 break 3343 expressions.append(constraint) 3344 3345 return self.expression(exp.Constraint, this=this, expressions=expressions) 3346 3347 def _parse_unnamed_constraint( 3348 self, constraints: t.Optional[t.Collection[str]] = None 3349 ) -> t.Optional[exp.Expression]: 3350 if not self._match_texts(constraints or self.CONSTRAINT_PARSERS): 3351 return None 3352 3353 constraint = self._prev.text.upper() 3354 if constraint not in self.CONSTRAINT_PARSERS: 3355 self.raise_error(f"No parser found for schema constraint {constraint}.") 3356 3357 return self.CONSTRAINT_PARSERS[constraint](self) 3358 3359 def _parse_unique(self) -> exp.Expression: 3360 if not self._match(TokenType.L_PAREN, advance=False): 3361 return self.expression(exp.UniqueColumnConstraint) 3362 return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars()) 3363 3364 def _parse_key_constraint_options(self) -> t.List[str]: 3365 options = [] 3366 while True: 3367 if not self._curr: 3368 break 3369 3370 if self._match(TokenType.ON): 3371 action = None 3372 on = self._advance_any() and self._prev.text 3373 3374 if self._match(TokenType.NO_ACTION): 3375 action = "NO ACTION" 3376 elif self._match(TokenType.CASCADE): 3377 action = "CASCADE" 3378 elif self._match_pair(TokenType.SET, TokenType.NULL): 3379 action = "SET NULL" 3380 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 3381 action = "SET DEFAULT" 3382 else: 3383 self.raise_error("Invalid key constraint") 3384 3385 options.append(f"ON {on} {action}") 3386 elif self._match_text_seq("NOT", "ENFORCED"): 3387 options.append("NOT ENFORCED") 3388 elif self._match_text_seq("DEFERRABLE"): 3389 options.append("DEFERRABLE") 3390 elif self._match_text_seq("INITIALLY", "DEFERRED"): 3391 options.append("INITIALLY DEFERRED") 3392 elif self._match_text_seq("NORELY"): 3393 options.append("NORELY") 3394 elif self._match_text_seq("MATCH", "FULL"): 3395 options.append("MATCH FULL") 3396 else: 3397 break 3398 3399 return options 3400 3401 def _parse_references(self, match=True) -> t.Optional[exp.Expression]: 3402 if match and not self._match(TokenType.REFERENCES): 3403 return None 3404 3405 expressions = None 3406 this = self._parse_id_var() 3407 3408 if self._match(TokenType.L_PAREN, advance=False): 3409 expressions = self._parse_wrapped_id_vars() 3410 3411 options = self._parse_key_constraint_options() 3412 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 3413 3414 def _parse_foreign_key(self) -> exp.Expression: 3415 expressions = self._parse_wrapped_id_vars() 3416 reference = self._parse_references() 3417 options = {} 3418 3419 while self._match(TokenType.ON): 3420 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 3421 self.raise_error("Expected DELETE or UPDATE") 3422 3423 kind = self._prev.text.lower() 3424 3425 if self._match(TokenType.NO_ACTION): 3426 action = "NO ACTION" 3427 elif self._match(TokenType.SET): 3428 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 3429 action = "SET " + self._prev.text.upper() 3430 else: 3431 self._advance() 3432 action = self._prev.text.upper() 3433 3434 options[kind] = action 3435 3436 return self.expression( 3437 exp.ForeignKey, expressions=expressions, reference=reference, **options # type: ignore 3438 ) 3439 3440 def _parse_primary_key(self) -> exp.Expression: 3441 desc = ( 3442 self._match_set((TokenType.ASC, TokenType.DESC)) 3443 and self._prev.token_type == TokenType.DESC 3444 ) 3445 3446 if not self._match(TokenType.L_PAREN, advance=False): 3447 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 3448 3449 expressions = self._parse_wrapped_csv(self._parse_field) 3450 options = self._parse_key_constraint_options() 3451 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 3452 3453 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3454 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 3455 return this 3456 3457 bracket_kind = self._prev.token_type 3458 expressions: t.List[t.Optional[exp.Expression]] 3459 3460 if self._match(TokenType.COLON): 3461 expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())] 3462 else: 3463 expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction())) 3464 3465 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 3466 if bracket_kind == TokenType.L_BRACE: 3467 this = self.expression(exp.Struct, expressions=expressions) 3468 elif not this or this.name.upper() == "ARRAY": 3469 this = self.expression(exp.Array, expressions=expressions) 3470 else: 3471 expressions = apply_index_offset(this, expressions, -self.index_offset) 3472 this = self.expression(exp.Bracket, this=this, expressions=expressions) 3473 3474 if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET: 3475 self.raise_error("Expected ]") 3476 elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE: 3477 self.raise_error("Expected }") 3478 3479 self._add_comments(this) 3480 return self._parse_bracket(this) 3481 3482 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 3483 if self._match(TokenType.COLON): 3484 return self.expression(exp.Slice, this=this, expression=self._parse_conjunction()) 3485 return this 3486 3487 def _parse_case(self) -> t.Optional[exp.Expression]: 3488 ifs = [] 3489 default = None 3490 3491 expression = self._parse_conjunction() 3492 3493 while self._match(TokenType.WHEN): 3494 this = self._parse_conjunction() 3495 self._match(TokenType.THEN) 3496 then = self._parse_conjunction() 3497 ifs.append(self.expression(exp.If, this=this, true=then)) 3498 3499 if self._match(TokenType.ELSE): 3500 default = self._parse_conjunction() 3501 3502 if not self._match(TokenType.END): 3503 self.raise_error("Expected END after CASE", self._prev) 3504 3505 return self._parse_window( 3506 self.expression(exp.Case, this=expression, ifs=ifs, default=default) 3507 ) 3508 3509 def _parse_if(self) -> t.Optional[exp.Expression]: 3510 if self._match(TokenType.L_PAREN): 3511 args = self._parse_csv(self._parse_conjunction) 3512 this = exp.If.from_arg_list(args) 3513 self.validate_expression(this, args) 3514 self._match_r_paren() 3515 else: 3516 index = self._index - 1 3517 condition = self._parse_conjunction() 3518 3519 if not condition: 3520 self._retreat(index) 3521 return None 3522 3523 self._match(TokenType.THEN) 3524 true = self._parse_conjunction() 3525 false = self._parse_conjunction() if self._match(TokenType.ELSE) else None 3526 self._match(TokenType.END) 3527 this = self.expression(exp.If, this=condition, true=true, false=false) 3528 3529 return self._parse_window(this) 3530 3531 def _parse_extract(self) -> exp.Expression: 3532 this = self._parse_function() or self._parse_var() or self._parse_type() 3533 3534 if self._match(TokenType.FROM): 3535 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3536 3537 if not self._match(TokenType.COMMA): 3538 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 3539 3540 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 3541 3542 def _parse_cast(self, strict: bool) -> exp.Expression: 3543 this = self._parse_conjunction() 3544 3545 if not self._match(TokenType.ALIAS): 3546 if self._match(TokenType.COMMA): 3547 return self.expression( 3548 exp.CastToStrType, this=this, expression=self._parse_string() 3549 ) 3550 else: 3551 self.raise_error("Expected AS after CAST") 3552 3553 to = self._parse_types() 3554 3555 if not to: 3556 self.raise_error("Expected TYPE after CAST") 3557 elif to.this == exp.DataType.Type.CHAR: 3558 if self._match(TokenType.CHARACTER_SET): 3559 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 3560 3561 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3562 3563 def _parse_string_agg(self) -> exp.Expression: 3564 expression: t.Optional[exp.Expression] 3565 3566 if self._match(TokenType.DISTINCT): 3567 args = self._parse_csv(self._parse_conjunction) 3568 expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)]) 3569 else: 3570 args = self._parse_csv(self._parse_conjunction) 3571 expression = seq_get(args, 0) 3572 3573 index = self._index 3574 if not self._match(TokenType.R_PAREN): 3575 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 3576 order = self._parse_order(this=expression) 3577 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3578 3579 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 3580 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 3581 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 3582 if not self._match(TokenType.WITHIN_GROUP): 3583 self._retreat(index) 3584 this = exp.GroupConcat.from_arg_list(args) 3585 self.validate_expression(this, args) 3586 return this 3587 3588 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 3589 order = self._parse_order(this=expression) 3590 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 3591 3592 def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]: 3593 to: t.Optional[exp.Expression] 3594 this = self._parse_bitwise() 3595 3596 if self._match(TokenType.USING): 3597 to = self.expression(exp.CharacterSet, this=self._parse_var()) 3598 elif self._match(TokenType.COMMA): 3599 to = self._parse_bitwise() 3600 else: 3601 to = None 3602 3603 # Swap the argument order if needed to produce the correct AST 3604 if self.CONVERT_TYPE_FIRST: 3605 this, to = to, this 3606 3607 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to) 3608 3609 def _parse_decode(self) -> t.Optional[exp.Expression]: 3610 """ 3611 There are generally two variants of the DECODE function: 3612 3613 - DECODE(bin, charset) 3614 - DECODE(expression, search, result [, search, result] ... [, default]) 3615 3616 The second variant will always be parsed into a CASE expression. Note that NULL 3617 needs special treatment, since we need to explicitly check for it with `IS NULL`, 3618 instead of relying on pattern matching. 3619 """ 3620 args = self._parse_csv(self._parse_conjunction) 3621 3622 if len(args) < 3: 3623 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 3624 3625 expression, *expressions = args 3626 if not expression: 3627 return None 3628 3629 ifs = [] 3630 for search, result in zip(expressions[::2], expressions[1::2]): 3631 if not search or not result: 3632 return None 3633 3634 if isinstance(search, exp.Literal): 3635 ifs.append( 3636 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 3637 ) 3638 elif isinstance(search, exp.Null): 3639 ifs.append( 3640 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 3641 ) 3642 else: 3643 cond = exp.or_( 3644 exp.EQ(this=expression.copy(), expression=search), 3645 exp.and_( 3646 exp.Is(this=expression.copy(), expression=exp.Null()), 3647 exp.Is(this=search.copy(), expression=exp.Null()), 3648 copy=False, 3649 ), 3650 copy=False, 3651 ) 3652 ifs.append(exp.If(this=cond, true=result)) 3653 3654 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 3655 3656 def _parse_json_key_value(self) -> t.Optional[exp.Expression]: 3657 self._match_text_seq("KEY") 3658 key = self._parse_field() 3659 self._match(TokenType.COLON) 3660 self._match_text_seq("VALUE") 3661 value = self._parse_field() 3662 if not key and not value: 3663 return None 3664 return self.expression(exp.JSONKeyValue, this=key, expression=value) 3665 3666 def _parse_json_object(self) -> exp.Expression: 3667 expressions = self._parse_csv(self._parse_json_key_value) 3668 3669 null_handling = None 3670 if self._match_text_seq("NULL", "ON", "NULL"): 3671 null_handling = "NULL ON NULL" 3672 elif self._match_text_seq("ABSENT", "ON", "NULL"): 3673 null_handling = "ABSENT ON NULL" 3674 3675 unique_keys = None 3676 if self._match_text_seq("WITH", "UNIQUE"): 3677 unique_keys = True 3678 elif self._match_text_seq("WITHOUT", "UNIQUE"): 3679 unique_keys = False 3680 3681 self._match_text_seq("KEYS") 3682 3683 return_type = self._match_text_seq("RETURNING") and self._parse_type() 3684 format_json = self._match_text_seq("FORMAT", "JSON") 3685 encoding = self._match_text_seq("ENCODING") and self._parse_var() 3686 3687 return self.expression( 3688 exp.JSONObject, 3689 expressions=expressions, 3690 null_handling=null_handling, 3691 unique_keys=unique_keys, 3692 return_type=return_type, 3693 format_json=format_json, 3694 encoding=encoding, 3695 ) 3696 3697 def _parse_logarithm(self) -> exp.Expression: 3698 # Default argument order is base, expression 3699 args = self._parse_csv(self._parse_range) 3700 3701 if len(args) > 1: 3702 if not self.LOG_BASE_FIRST: 3703 args.reverse() 3704 return exp.Log.from_arg_list(args) 3705 3706 return self.expression( 3707 exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0) 3708 ) 3709 3710 def _parse_match_against(self) -> exp.Expression: 3711 expressions = self._parse_csv(self._parse_column) 3712 3713 self._match_text_seq(")", "AGAINST", "(") 3714 3715 this = self._parse_string() 3716 3717 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 3718 modifier = "IN NATURAL LANGUAGE MODE" 3719 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3720 modifier = f"{modifier} WITH QUERY EXPANSION" 3721 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 3722 modifier = "IN BOOLEAN MODE" 3723 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 3724 modifier = "WITH QUERY EXPANSION" 3725 else: 3726 modifier = None 3727 3728 return self.expression( 3729 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 3730 ) 3731 3732 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 3733 def _parse_open_json(self) -> exp.Expression: 3734 this = self._parse_bitwise() 3735 path = self._match(TokenType.COMMA) and self._parse_string() 3736 3737 def _parse_open_json_column_def() -> exp.Expression: 3738 this = self._parse_field(any_token=True) 3739 kind = self._parse_types() 3740 path = self._parse_string() 3741 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 3742 return self.expression( 3743 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 3744 ) 3745 3746 expressions = None 3747 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 3748 self._match_l_paren() 3749 expressions = self._parse_csv(_parse_open_json_column_def) 3750 3751 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 3752 3753 def _parse_position(self, haystack_first: bool = False) -> exp.Expression: 3754 args = self._parse_csv(self._parse_bitwise) 3755 3756 if self._match(TokenType.IN): 3757 return self.expression( 3758 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 3759 ) 3760 3761 if haystack_first: 3762 haystack = seq_get(args, 0) 3763 needle = seq_get(args, 1) 3764 else: 3765 needle = seq_get(args, 0) 3766 haystack = seq_get(args, 1) 3767 3768 this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 3769 3770 self.validate_expression(this, args) 3771 3772 return this 3773 3774 def _parse_join_hint(self, func_name: str) -> exp.Expression: 3775 args = self._parse_csv(self._parse_table) 3776 return exp.JoinHint(this=func_name.upper(), expressions=args) 3777 3778 def _parse_substring(self) -> exp.Expression: 3779 # Postgres supports the form: substring(string [from int] [for int]) 3780 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 3781 3782 args = self._parse_csv(self._parse_bitwise) 3783 3784 if self._match(TokenType.FROM): 3785 args.append(self._parse_bitwise()) 3786 if self._match(TokenType.FOR): 3787 args.append(self._parse_bitwise()) 3788 3789 this = exp.Substring.from_arg_list(args) 3790 self.validate_expression(this, args) 3791 3792 return this 3793 3794 def _parse_trim(self) -> exp.Expression: 3795 # https://www.w3resource.com/sql/character-functions/trim.php 3796 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 3797 3798 position = None 3799 collation = None 3800 3801 if self._match_set(self.TRIM_TYPES): 3802 position = self._prev.text.upper() 3803 3804 expression = self._parse_bitwise() 3805 if self._match_set((TokenType.FROM, TokenType.COMMA)): 3806 this = self._parse_bitwise() 3807 else: 3808 this = expression 3809 expression = None 3810 3811 if self._match(TokenType.COLLATE): 3812 collation = self._parse_bitwise() 3813 3814 return self.expression( 3815 exp.Trim, 3816 this=this, 3817 position=position, 3818 expression=expression, 3819 collation=collation, 3820 ) 3821 3822 def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 3823 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 3824 3825 def _parse_named_window(self) -> t.Optional[exp.Expression]: 3826 return self._parse_window(self._parse_id_var(), alias=True) 3827 3828 def _parse_respect_or_ignore_nulls( 3829 self, this: t.Optional[exp.Expression] 3830 ) -> t.Optional[exp.Expression]: 3831 if self._match(TokenType.IGNORE_NULLS): 3832 return self.expression(exp.IgnoreNulls, this=this) 3833 if self._match(TokenType.RESPECT_NULLS): 3834 return self.expression(exp.RespectNulls, this=this) 3835 return this 3836 3837 def _parse_window( 3838 self, this: t.Optional[exp.Expression], alias: bool = False 3839 ) -> t.Optional[exp.Expression]: 3840 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 3841 this = self.expression(exp.Filter, this=this, expression=self._parse_where()) 3842 self._match_r_paren() 3843 3844 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 3845 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 3846 if self._match(TokenType.WITHIN_GROUP): 3847 order = self._parse_wrapped(self._parse_order) 3848 this = self.expression(exp.WithinGroup, this=this, expression=order) 3849 3850 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 3851 # Some dialects choose to implement and some do not. 3852 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 3853 3854 # There is some code above in _parse_lambda that handles 3855 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 3856 3857 # The below changes handle 3858 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 3859 3860 # Oracle allows both formats 3861 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 3862 # and Snowflake chose to do the same for familiarity 3863 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 3864 this = self._parse_respect_or_ignore_nulls(this) 3865 3866 # bigquery select from window x AS (partition by ...) 3867 if alias: 3868 over = None 3869 self._match(TokenType.ALIAS) 3870 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 3871 return this 3872 else: 3873 over = self._prev.text.upper() 3874 3875 if not self._match(TokenType.L_PAREN): 3876 return self.expression( 3877 exp.Window, this=this, alias=self._parse_id_var(False), over=over 3878 ) 3879 3880 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 3881 3882 first = self._match(TokenType.FIRST) 3883 if self._match_text_seq("LAST"): 3884 first = False 3885 3886 partition = self._parse_partition_by() 3887 order = self._parse_order() 3888 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 3889 3890 if kind: 3891 self._match(TokenType.BETWEEN) 3892 start = self._parse_window_spec() 3893 self._match(TokenType.AND) 3894 end = self._parse_window_spec() 3895 3896 spec = self.expression( 3897 exp.WindowSpec, 3898 kind=kind, 3899 start=start["value"], 3900 start_side=start["side"], 3901 end=end["value"], 3902 end_side=end["side"], 3903 ) 3904 else: 3905 spec = None 3906 3907 self._match_r_paren() 3908 3909 return self.expression( 3910 exp.Window, 3911 this=this, 3912 partition_by=partition, 3913 order=order, 3914 spec=spec, 3915 alias=window_alias, 3916 over=over, 3917 first=first, 3918 ) 3919 3920 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 3921 self._match(TokenType.BETWEEN) 3922 3923 return { 3924 "value": ( 3925 self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text 3926 ) 3927 or self._parse_bitwise(), 3928 "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text, 3929 } 3930 3931 def _parse_alias( 3932 self, this: t.Optional[exp.Expression], explicit: bool = False 3933 ) -> t.Optional[exp.Expression]: 3934 any_token = self._match(TokenType.ALIAS) 3935 3936 if explicit and not any_token: 3937 return this 3938 3939 if self._match(TokenType.L_PAREN): 3940 aliases = self.expression( 3941 exp.Aliases, 3942 this=this, 3943 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 3944 ) 3945 self._match_r_paren(aliases) 3946 return aliases 3947 3948 alias = self._parse_id_var(any_token) 3949 3950 if alias: 3951 return self.expression(exp.Alias, this=this, alias=alias) 3952 3953 return this 3954 3955 def _parse_id_var( 3956 self, 3957 any_token: bool = True, 3958 tokens: t.Optional[t.Collection[TokenType]] = None, 3959 prefix_tokens: t.Optional[t.Collection[TokenType]] = None, 3960 ) -> t.Optional[exp.Expression]: 3961 identifier = self._parse_identifier() 3962 3963 if identifier: 3964 return identifier 3965 3966 prefix = "" 3967 3968 if prefix_tokens: 3969 while self._match_set(prefix_tokens): 3970 prefix += self._prev.text 3971 3972 if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS): 3973 quoted = self._prev.token_type == TokenType.STRING 3974 return exp.Identifier(this=prefix + self._prev.text, quoted=quoted) 3975 3976 return None 3977 3978 def _parse_string(self) -> t.Optional[exp.Expression]: 3979 if self._match(TokenType.STRING): 3980 return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev) 3981 return self._parse_placeholder() 3982 3983 def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]: 3984 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 3985 3986 def _parse_number(self) -> t.Optional[exp.Expression]: 3987 if self._match(TokenType.NUMBER): 3988 return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev) 3989 return self._parse_placeholder() 3990 3991 def _parse_identifier(self) -> t.Optional[exp.Expression]: 3992 if self._match(TokenType.IDENTIFIER): 3993 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 3994 return self._parse_placeholder() 3995 3996 def _parse_var( 3997 self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None 3998 ) -> t.Optional[exp.Expression]: 3999 if ( 4000 (any_token and self._advance_any()) 4001 or self._match(TokenType.VAR) 4002 or (self._match_set(tokens) if tokens else False) 4003 ): 4004 return self.expression(exp.Var, this=self._prev.text) 4005 return self._parse_placeholder() 4006 4007 def _advance_any(self) -> t.Optional[Token]: 4008 if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS: 4009 self._advance() 4010 return self._prev 4011 return None 4012 4013 def _parse_var_or_string(self) -> t.Optional[exp.Expression]: 4014 return self._parse_var() or self._parse_string() 4015 4016 def _parse_null(self) -> t.Optional[exp.Expression]: 4017 if self._match(TokenType.NULL): 4018 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 4019 return None 4020 4021 def _parse_boolean(self) -> t.Optional[exp.Expression]: 4022 if self._match(TokenType.TRUE): 4023 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 4024 if self._match(TokenType.FALSE): 4025 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 4026 return None 4027 4028 def _parse_star(self) -> t.Optional[exp.Expression]: 4029 if self._match(TokenType.STAR): 4030 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 4031 return None 4032 4033 def _parse_parameter(self) -> exp.Expression: 4034 wrapped = self._match(TokenType.L_BRACE) 4035 this = self._parse_var() or self._parse_identifier() or self._parse_primary() 4036 self._match(TokenType.R_BRACE) 4037 return self.expression(exp.Parameter, this=this, wrapped=wrapped) 4038 4039 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 4040 if self._match_set(self.PLACEHOLDER_PARSERS): 4041 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 4042 if placeholder: 4043 return placeholder 4044 self._advance(-1) 4045 return None 4046 4047 def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4048 if not self._match(TokenType.EXCEPT): 4049 return None 4050 if self._match(TokenType.L_PAREN, advance=False): 4051 return self._parse_wrapped_csv(self._parse_column) 4052 return self._parse_csv(self._parse_column) 4053 4054 def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]: 4055 if not self._match(TokenType.REPLACE): 4056 return None 4057 if self._match(TokenType.L_PAREN, advance=False): 4058 return self._parse_wrapped_csv(self._parse_expression) 4059 return self._parse_csv(self._parse_expression) 4060 4061 def _parse_csv( 4062 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 4063 ) -> t.List[t.Optional[exp.Expression]]: 4064 parse_result = parse_method() 4065 items = [parse_result] if parse_result is not None else [] 4066 4067 while self._match(sep): 4068 self._add_comments(parse_result) 4069 parse_result = parse_method() 4070 if parse_result is not None: 4071 items.append(parse_result) 4072 4073 return items 4074 4075 def _parse_tokens( 4076 self, parse_method: t.Callable, expressions: t.Dict 4077 ) -> t.Optional[exp.Expression]: 4078 this = parse_method() 4079 4080 while self._match_set(expressions): 4081 this = self.expression( 4082 expressions[self._prev.token_type], 4083 this=this, 4084 comments=self._prev_comments, 4085 expression=parse_method(), 4086 ) 4087 4088 return this 4089 4090 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]: 4091 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 4092 4093 def _parse_wrapped_csv( 4094 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 4095 ) -> t.List[t.Optional[exp.Expression]]: 4096 return self._parse_wrapped( 4097 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 4098 ) 4099 4100 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 4101 wrapped = self._match(TokenType.L_PAREN) 4102 if not wrapped and not optional: 4103 self.raise_error("Expecting (") 4104 parse_result = parse_method() 4105 if wrapped: 4106 self._match_r_paren() 4107 return parse_result 4108 4109 def _parse_select_or_expression(self) -> t.Optional[exp.Expression]: 4110 return self._parse_select() or self._parse_set_operations(self._parse_expression()) 4111 4112 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 4113 return self._parse_set_operations( 4114 self._parse_select(nested=True, parse_subquery_alias=False) 4115 ) 4116 4117 def _parse_transaction(self) -> exp.Expression: 4118 this = None 4119 if self._match_texts(self.TRANSACTION_KIND): 4120 this = self._prev.text 4121 4122 self._match_texts({"TRANSACTION", "WORK"}) 4123 4124 modes = [] 4125 while True: 4126 mode = [] 4127 while self._match(TokenType.VAR): 4128 mode.append(self._prev.text) 4129 4130 if mode: 4131 modes.append(" ".join(mode)) 4132 if not self._match(TokenType.COMMA): 4133 break 4134 4135 return self.expression(exp.Transaction, this=this, modes=modes) 4136 4137 def _parse_commit_or_rollback(self) -> exp.Expression: 4138 chain = None 4139 savepoint = None 4140 is_rollback = self._prev.token_type == TokenType.ROLLBACK 4141 4142 self._match_texts({"TRANSACTION", "WORK"}) 4143 4144 if self._match_text_seq("TO"): 4145 self._match_text_seq("SAVEPOINT") 4146 savepoint = self._parse_id_var() 4147 4148 if self._match(TokenType.AND): 4149 chain = not self._match_text_seq("NO") 4150 self._match_text_seq("CHAIN") 4151 4152 if is_rollback: 4153 return self.expression(exp.Rollback, savepoint=savepoint) 4154 return self.expression(exp.Commit, chain=chain) 4155 4156 def _parse_add_column(self) -> t.Optional[exp.Expression]: 4157 if not self._match_text_seq("ADD"): 4158 return None 4159 4160 self._match(TokenType.COLUMN) 4161 exists_column = self._parse_exists(not_=True) 4162 expression = self._parse_column_def(self._parse_field(any_token=True)) 4163 4164 if expression: 4165 expression.set("exists", exists_column) 4166 4167 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 4168 if self._match_texts(("FIRST", "AFTER")): 4169 position = self._prev.text 4170 column_position = self.expression( 4171 exp.ColumnPosition, this=self._parse_column(), position=position 4172 ) 4173 expression.set("position", column_position) 4174 4175 return expression 4176 4177 def _parse_drop_column(self) -> t.Optional[exp.Expression]: 4178 drop = self._match(TokenType.DROP) and self._parse_drop() 4179 if drop and not isinstance(drop, exp.Command): 4180 drop.set("kind", drop.args.get("kind", "COLUMN")) 4181 return drop 4182 4183 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 4184 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression: 4185 return self.expression( 4186 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 4187 ) 4188 4189 def _parse_add_constraint(self) -> t.Optional[exp.Expression]: 4190 this = None 4191 kind = self._prev.token_type 4192 4193 if kind == TokenType.CONSTRAINT: 4194 this = self._parse_id_var() 4195 4196 if self._match_text_seq("CHECK"): 4197 expression = self._parse_wrapped(self._parse_conjunction) 4198 enforced = self._match_text_seq("ENFORCED") 4199 4200 return self.expression( 4201 exp.AddConstraint, this=this, expression=expression, enforced=enforced 4202 ) 4203 4204 if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY): 4205 expression = self._parse_foreign_key() 4206 elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY): 4207 expression = self._parse_primary_key() 4208 else: 4209 expression = None 4210 4211 return self.expression(exp.AddConstraint, this=this, expression=expression) 4212 4213 def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]: 4214 index = self._index - 1 4215 4216 if self._match_set(self.ADD_CONSTRAINT_TOKENS): 4217 return self._parse_csv(self._parse_add_constraint) 4218 4219 self._retreat(index) 4220 return self._parse_csv(self._parse_add_column) 4221 4222 def _parse_alter_table_alter(self) -> exp.Expression: 4223 self._match(TokenType.COLUMN) 4224 column = self._parse_field(any_token=True) 4225 4226 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 4227 return self.expression(exp.AlterColumn, this=column, drop=True) 4228 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 4229 return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction()) 4230 4231 self._match_text_seq("SET", "DATA") 4232 return self.expression( 4233 exp.AlterColumn, 4234 this=column, 4235 dtype=self._match_text_seq("TYPE") and self._parse_types(), 4236 collate=self._match(TokenType.COLLATE) and self._parse_term(), 4237 using=self._match(TokenType.USING) and self._parse_conjunction(), 4238 ) 4239 4240 def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]: 4241 index = self._index - 1 4242 4243 partition_exists = self._parse_exists() 4244 if self._match(TokenType.PARTITION, advance=False): 4245 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 4246 4247 self._retreat(index) 4248 return self._parse_csv(self._parse_drop_column) 4249 4250 def _parse_alter_table_rename(self) -> exp.Expression: 4251 self._match_text_seq("TO") 4252 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 4253 4254 def _parse_alter(self) -> t.Optional[exp.Expression]: 4255 start = self._prev 4256 4257 if not self._match(TokenType.TABLE): 4258 return self._parse_as_command(start) 4259 4260 exists = self._parse_exists() 4261 this = self._parse_table(schema=True) 4262 4263 if self._next: 4264 self._advance() 4265 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 4266 4267 if parser: 4268 actions = ensure_list(parser(self)) 4269 4270 if not self._curr: 4271 return self.expression( 4272 exp.AlterTable, 4273 this=this, 4274 exists=exists, 4275 actions=actions, 4276 ) 4277 return self._parse_as_command(start) 4278 4279 def _parse_merge(self) -> exp.Expression: 4280 self._match(TokenType.INTO) 4281 target = self._parse_table() 4282 4283 self._match(TokenType.USING) 4284 using = self._parse_table() 4285 4286 self._match(TokenType.ON) 4287 on = self._parse_conjunction() 4288 4289 whens = [] 4290 while self._match(TokenType.WHEN): 4291 matched = not self._match(TokenType.NOT) 4292 self._match_text_seq("MATCHED") 4293 source = ( 4294 False 4295 if self._match_text_seq("BY", "TARGET") 4296 else self._match_text_seq("BY", "SOURCE") 4297 ) 4298 condition = self._parse_conjunction() if self._match(TokenType.AND) else None 4299 4300 self._match(TokenType.THEN) 4301 4302 if self._match(TokenType.INSERT): 4303 _this = self._parse_star() 4304 if _this: 4305 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this) 4306 else: 4307 then = self.expression( 4308 exp.Insert, 4309 this=self._parse_value(), 4310 expression=self._match(TokenType.VALUES) and self._parse_value(), 4311 ) 4312 elif self._match(TokenType.UPDATE): 4313 expressions = self._parse_star() 4314 if expressions: 4315 then = self.expression(exp.Update, expressions=expressions) 4316 else: 4317 then = self.expression( 4318 exp.Update, 4319 expressions=self._match(TokenType.SET) 4320 and self._parse_csv(self._parse_equality), 4321 ) 4322 elif self._match(TokenType.DELETE): 4323 then = self.expression(exp.Var, this=self._prev.text) 4324 else: 4325 then = None 4326 4327 whens.append( 4328 self.expression( 4329 exp.When, 4330 matched=matched, 4331 source=source, 4332 condition=condition, 4333 then=then, 4334 ) 4335 ) 4336 4337 return self.expression( 4338 exp.Merge, 4339 this=target, 4340 using=using, 4341 on=on, 4342 expressions=whens, 4343 ) 4344 4345 def _parse_show(self) -> t.Optional[exp.Expression]: 4346 parser = self._find_parser(self.SHOW_PARSERS, self._show_trie) # type: ignore 4347 if parser: 4348 return parser(self) 4349 self._advance() 4350 return self.expression(exp.Show, this=self._prev.text.upper()) 4351 4352 def _parse_set_item_assignment( 4353 self, kind: t.Optional[str] = None 4354 ) -> t.Optional[exp.Expression]: 4355 index = self._index 4356 4357 if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"): 4358 return self._parse_set_transaction(global_=kind == "GLOBAL") 4359 4360 left = self._parse_primary() or self._parse_id_var() 4361 4362 if not self._match_texts(("=", "TO")): 4363 self._retreat(index) 4364 return None 4365 4366 right = self._parse_statement() or self._parse_id_var() 4367 this = self.expression( 4368 exp.EQ, 4369 this=left, 4370 expression=right, 4371 ) 4372 4373 return self.expression( 4374 exp.SetItem, 4375 this=this, 4376 kind=kind, 4377 ) 4378 4379 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 4380 self._match_text_seq("TRANSACTION") 4381 characteristics = self._parse_csv( 4382 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 4383 ) 4384 return self.expression( 4385 exp.SetItem, 4386 expressions=characteristics, 4387 kind="TRANSACTION", 4388 **{"global": global_}, # type: ignore 4389 ) 4390 4391 def _parse_set_item(self) -> t.Optional[exp.Expression]: 4392 parser = self._find_parser(self.SET_PARSERS, self._set_trie) # type: ignore 4393 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 4394 4395 def _parse_set(self) -> exp.Expression: 4396 index = self._index 4397 set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item)) 4398 4399 if self._curr: 4400 self._retreat(index) 4401 return self._parse_as_command(self._prev) 4402 4403 return set_ 4404 4405 def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]: 4406 for option in options: 4407 if self._match_text_seq(*option.split(" ")): 4408 return exp.Var(this=option) 4409 return None 4410 4411 def _parse_as_command(self, start: Token) -> exp.Command: 4412 while self._curr: 4413 self._advance() 4414 text = self._find_sql(start, self._prev) 4415 size = len(start.text) 4416 return exp.Command(this=text[:size], expression=text[size:]) 4417 4418 def _find_parser( 4419 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 4420 ) -> t.Optional[t.Callable]: 4421 if not self._curr: 4422 return None 4423 4424 index = self._index 4425 this = [] 4426 while True: 4427 # The current token might be multiple words 4428 curr = self._curr.text.upper() 4429 key = curr.split(" ") 4430 this.append(curr) 4431 self._advance() 4432 result, trie = in_trie(trie, key) 4433 if result == 0: 4434 break 4435 if result == 2: 4436 subparser = parsers[" ".join(this)] 4437 return subparser 4438 self._retreat(index) 4439 return None 4440 4441 def _match(self, token_type, advance=True, expression=None): 4442 if not self._curr: 4443 return None 4444 4445 if self._curr.token_type == token_type: 4446 if advance: 4447 self._advance() 4448 self._add_comments(expression) 4449 return True 4450 4451 return None 4452 4453 def _match_set(self, types, advance=True): 4454 if not self._curr: 4455 return None 4456 4457 if self._curr.token_type in types: 4458 if advance: 4459 self._advance() 4460 return True 4461 4462 return None 4463 4464 def _match_pair(self, token_type_a, token_type_b, advance=True): 4465 if not self._curr or not self._next: 4466 return None 4467 4468 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 4469 if advance: 4470 self._advance(2) 4471 return True 4472 4473 return None 4474 4475 def _match_l_paren(self, expression=None): 4476 if not self._match(TokenType.L_PAREN, expression=expression): 4477 self.raise_error("Expecting (") 4478 4479 def _match_r_paren(self, expression=None): 4480 if not self._match(TokenType.R_PAREN, expression=expression): 4481 self.raise_error("Expecting )") 4482 4483 def _match_texts(self, texts, advance=True): 4484 if self._curr and self._curr.text.upper() in texts: 4485 if advance: 4486 self._advance() 4487 return True 4488 return False 4489 4490 def _match_text_seq(self, *texts, advance=True): 4491 index = self._index 4492 for text in texts: 4493 if self._curr and self._curr.text.upper() == text: 4494 self._advance() 4495 else: 4496 self._retreat(index) 4497 return False 4498 4499 if not advance: 4500 self._retreat(index) 4501 4502 return True 4503 4504 def _replace_columns_with_dots(self, this): 4505 if isinstance(this, exp.Dot): 4506 exp.replace_children(this, self._replace_columns_with_dots) 4507 elif isinstance(this, exp.Column): 4508 exp.replace_children(this, self._replace_columns_with_dots) 4509 table = this.args.get("table") 4510 this = ( 4511 self.expression(exp.Dot, this=table, expression=this.this) 4512 if table 4513 else self.expression(exp.Var, this=this.name) 4514 ) 4515 elif isinstance(this, exp.Identifier): 4516 this = self.expression(exp.Var, this=this.name) 4517 return this 4518 4519 def _replace_lambda(self, node, lambda_variables): 4520 for column in node.find_all(exp.Column): 4521 if column.parts[0].name in lambda_variables: 4522 dot_or_id = column.to_dot() if column.table else column.this 4523 parent = column.parent 4524 4525 while isinstance(parent, exp.Dot): 4526 if not isinstance(parent.parent, exp.Dot): 4527 parent.replace(dot_or_id) 4528 break 4529 parent = parent.parent 4530 else: 4531 if column is node: 4532 node = dot_or_id 4533 else: 4534 column.replace(dot_or_id) 4535 return node
Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer
and produces
a parsed syntax tree.
Arguments:
- error_level: the desired error level. Default: ErrorLevel.RAISE
- error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
- index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
- alias_post_tablesample: If the table alias comes after tablesample. Default: False
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
809 def __init__( 810 self, 811 error_level: t.Optional[ErrorLevel] = None, 812 error_message_context: int = 100, 813 index_offset: int = 0, 814 unnest_column_only: bool = False, 815 alias_post_tablesample: bool = False, 816 max_errors: int = 3, 817 null_ordering: t.Optional[str] = None, 818 ): 819 self.error_level = error_level or ErrorLevel.IMMEDIATE 820 self.error_message_context = error_message_context 821 self.index_offset = index_offset 822 self.unnest_column_only = unnest_column_only 823 self.alias_post_tablesample = alias_post_tablesample 824 self.max_errors = max_errors 825 self.null_ordering = null_ordering 826 self.reset()
838 def parse( 839 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 840 ) -> t.List[t.Optional[exp.Expression]]: 841 """ 842 Parses a list of tokens and returns a list of syntax trees, one tree 843 per parsed SQL statement. 844 845 Args: 846 raw_tokens: the list of tokens. 847 sql: the original SQL string, used to produce helpful debug messages. 848 849 Returns: 850 The list of syntax trees. 851 """ 852 return self._parse( 853 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 854 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The list of syntax trees.
856 def parse_into( 857 self, 858 expression_types: exp.IntoType, 859 raw_tokens: t.List[Token], 860 sql: t.Optional[str] = None, 861 ) -> t.List[t.Optional[exp.Expression]]: 862 """ 863 Parses a list of tokens into a given Expression type. If a collection of Expression 864 types is given instead, this method will try to parse the token list into each one 865 of them, stopping at the first for which the parsing succeeds. 866 867 Args: 868 expression_types: the expression type(s) to try and parse the token list into. 869 raw_tokens: the list of tokens. 870 sql: the original SQL string, used to produce helpful debug messages. 871 872 Returns: 873 The target Expression. 874 """ 875 errors = [] 876 for expression_type in ensure_collection(expression_types): 877 parser = self.EXPRESSION_PARSERS.get(expression_type) 878 if not parser: 879 raise TypeError(f"No parser registered for {expression_type}") 880 try: 881 return self._parse(parser, raw_tokens, sql) 882 except ParseError as e: 883 e.errors[0]["into_expression"] = expression_type 884 errors.append(e) 885 raise ParseError( 886 f"Failed to parse into {expression_types}", 887 errors=merge_errors(errors), 888 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: the expression type(s) to try and parse the token list into.
- raw_tokens: the list of tokens.
- sql: the original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
924 def check_errors(self) -> None: 925 """ 926 Logs or raises any found errors, depending on the chosen error level setting. 927 """ 928 if self.error_level == ErrorLevel.WARN: 929 for error in self.errors: 930 logger.error(str(error)) 931 elif self.error_level == ErrorLevel.RAISE and self.errors: 932 raise ParseError( 933 concat_messages(self.errors, self.max_errors), 934 errors=merge_errors(self.errors), 935 )
Logs or raises any found errors, depending on the chosen error level setting.
937 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 938 """ 939 Appends an error in the list of recorded errors or raises it, depending on the chosen 940 error level setting. 941 """ 942 token = token or self._curr or self._prev or Token.string("") 943 start = token.start 944 end = token.end 945 start_context = self.sql[max(start - self.error_message_context, 0) : start] 946 highlight = self.sql[start:end] 947 end_context = self.sql[end : end + self.error_message_context] 948 949 error = ParseError.new( 950 f"{message}. Line {token.line}, Col: {token.col}.\n" 951 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 952 description=message, 953 line=token.line, 954 col=token.col, 955 start_context=start_context, 956 highlight=highlight, 957 end_context=end_context, 958 ) 959 960 if self.error_level == ErrorLevel.IMMEDIATE: 961 raise error 962 963 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
965 def expression( 966 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 967 ) -> E: 968 """ 969 Creates a new, validated Expression. 970 971 Args: 972 exp_class: the expression class to instantiate. 973 comments: an optional list of comments to attach to the expression. 974 kwargs: the arguments to set for the expression along with their respective values. 975 976 Returns: 977 The target expression. 978 """ 979 instance = exp_class(**kwargs) 980 instance.add_comments(comments) if comments else self._add_comments(instance) 981 self.validate_expression(instance) 982 return instance
Creates a new, validated Expression.
Arguments:
- exp_class: the expression class to instantiate.
- comments: an optional list of comments to attach to the expression.
- kwargs: the arguments to set for the expression along with their respective values.
Returns:
The target expression.
989 def validate_expression( 990 self, expression: exp.Expression, args: t.Optional[t.List] = None 991 ) -> None: 992 """ 993 Validates an already instantiated expression, making sure that all its mandatory arguments 994 are set. 995 996 Args: 997 expression: the expression to validate. 998 args: an optional list of items that was used to instantiate the expression, if it's a Func. 999 """ 1000 if self.error_level == ErrorLevel.IGNORE: 1001 return 1002 1003 for error_message in expression.error_messages(args): 1004 self.raise_error(error_message)
Validates an already instantiated expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: the expression to validate.
- args: an optional list of items that was used to instantiate the expression, if it's a Func.