Edit on GitHub

sqlglot.parser

   1from __future__ import annotations
   2
   3import logging
   4import typing as t
   5from collections import defaultdict
   6
   7from sqlglot import exp
   8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors
   9from sqlglot.helper import apply_index_offset, ensure_collection, ensure_list, seq_get
  10from sqlglot.tokens import Token, Tokenizer, TokenType
  11from sqlglot.trie import in_trie, new_trie
  12
  13logger = logging.getLogger("sqlglot")
  14
  15E = t.TypeVar("E", bound=exp.Expression)
  16
  17
  18def parse_var_map(args: t.Sequence) -> exp.Expression:
  19    if len(args) == 1 and args[0].is_star:
  20        return exp.StarMap(this=args[0])
  21
  22    keys = []
  23    values = []
  24    for i in range(0, len(args), 2):
  25        keys.append(args[i])
  26        values.append(args[i + 1])
  27    return exp.VarMap(
  28        keys=exp.Array(expressions=keys),
  29        values=exp.Array(expressions=values),
  30    )
  31
  32
  33def parse_like(args):
  34    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
  35    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
  36
  37
  38def binary_range_parser(
  39    expr_type: t.Type[exp.Expression],
  40) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
  41    return lambda self, this: self._parse_escape(
  42        self.expression(expr_type, this=this, expression=self._parse_bitwise())
  43    )
  44
  45
  46class _Parser(type):
  47    def __new__(cls, clsname, bases, attrs):
  48        klass = super().__new__(cls, clsname, bases, attrs)
  49        klass._show_trie = new_trie(key.split(" ") for key in klass.SHOW_PARSERS)
  50        klass._set_trie = new_trie(key.split(" ") for key in klass.SET_PARSERS)
  51
  52        return klass
  53
  54
  55class Parser(metaclass=_Parser):
  56    """
  57    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  58    a parsed syntax tree.
  59
  60    Args:
  61        error_level: the desired error level.
  62            Default: ErrorLevel.RAISE
  63        error_message_context: determines the amount of context to capture from a
  64            query string when displaying the error message (in number of characters).
  65            Default: 50.
  66        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  67            Default: 0
  68        alias_post_tablesample: If the table alias comes after tablesample.
  69            Default: False
  70        max_errors: Maximum number of error messages to include in a raised ParseError.
  71            This is only relevant if error_level is ErrorLevel.RAISE.
  72            Default: 3
  73        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  74            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  75            Default: "nulls_are_small"
  76    """
  77
  78    FUNCTIONS: t.Dict[str, t.Callable] = {
  79        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  80        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  81            this=seq_get(args, 0),
  82            to=exp.DataType(this=exp.DataType.Type.TEXT),
  83        ),
  84        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
  85        "IFNULL": exp.Coalesce.from_arg_list,
  86        "LIKE": parse_like,
  87        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  88            this=seq_get(args, 0),
  89            to=exp.DataType(this=exp.DataType.Type.TEXT),
  90        ),
  91        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  92            this=exp.Cast(
  93                this=seq_get(args, 0),
  94                to=exp.DataType(this=exp.DataType.Type.TEXT),
  95            ),
  96            start=exp.Literal.number(1),
  97            length=exp.Literal.number(10),
  98        ),
  99        "VAR_MAP": parse_var_map,
 100    }
 101
 102    NO_PAREN_FUNCTIONS = {
 103        TokenType.CURRENT_DATE: exp.CurrentDate,
 104        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 105        TokenType.CURRENT_TIME: exp.CurrentTime,
 106        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 107        TokenType.CURRENT_USER: exp.CurrentUser,
 108    }
 109
 110    JOIN_HINTS: t.Set[str] = set()
 111
 112    NESTED_TYPE_TOKENS = {
 113        TokenType.ARRAY,
 114        TokenType.MAP,
 115        TokenType.NULLABLE,
 116        TokenType.STRUCT,
 117    }
 118
 119    TYPE_TOKENS = {
 120        TokenType.BIT,
 121        TokenType.BOOLEAN,
 122        TokenType.TINYINT,
 123        TokenType.UTINYINT,
 124        TokenType.SMALLINT,
 125        TokenType.USMALLINT,
 126        TokenType.INT,
 127        TokenType.UINT,
 128        TokenType.BIGINT,
 129        TokenType.UBIGINT,
 130        TokenType.INT128,
 131        TokenType.UINT128,
 132        TokenType.INT256,
 133        TokenType.UINT256,
 134        TokenType.FLOAT,
 135        TokenType.DOUBLE,
 136        TokenType.CHAR,
 137        TokenType.NCHAR,
 138        TokenType.VARCHAR,
 139        TokenType.NVARCHAR,
 140        TokenType.TEXT,
 141        TokenType.MEDIUMTEXT,
 142        TokenType.LONGTEXT,
 143        TokenType.MEDIUMBLOB,
 144        TokenType.LONGBLOB,
 145        TokenType.BINARY,
 146        TokenType.VARBINARY,
 147        TokenType.JSON,
 148        TokenType.JSONB,
 149        TokenType.INTERVAL,
 150        TokenType.TIME,
 151        TokenType.TIMESTAMP,
 152        TokenType.TIMESTAMPTZ,
 153        TokenType.TIMESTAMPLTZ,
 154        TokenType.DATETIME,
 155        TokenType.DATETIME64,
 156        TokenType.DATE,
 157        TokenType.DECIMAL,
 158        TokenType.BIGDECIMAL,
 159        TokenType.UUID,
 160        TokenType.GEOGRAPHY,
 161        TokenType.GEOMETRY,
 162        TokenType.HLLSKETCH,
 163        TokenType.HSTORE,
 164        TokenType.PSEUDO_TYPE,
 165        TokenType.SUPER,
 166        TokenType.SERIAL,
 167        TokenType.SMALLSERIAL,
 168        TokenType.BIGSERIAL,
 169        TokenType.XML,
 170        TokenType.UNIQUEIDENTIFIER,
 171        TokenType.MONEY,
 172        TokenType.SMALLMONEY,
 173        TokenType.ROWVERSION,
 174        TokenType.IMAGE,
 175        TokenType.VARIANT,
 176        TokenType.OBJECT,
 177        TokenType.INET,
 178        *NESTED_TYPE_TOKENS,
 179    }
 180
 181    SUBQUERY_PREDICATES = {
 182        TokenType.ANY: exp.Any,
 183        TokenType.ALL: exp.All,
 184        TokenType.EXISTS: exp.Exists,
 185        TokenType.SOME: exp.Any,
 186    }
 187
 188    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 189
 190    DB_CREATABLES = {
 191        TokenType.DATABASE,
 192        TokenType.SCHEMA,
 193        TokenType.TABLE,
 194        TokenType.VIEW,
 195    }
 196
 197    CREATABLES = {
 198        TokenType.COLUMN,
 199        TokenType.FUNCTION,
 200        TokenType.INDEX,
 201        TokenType.PROCEDURE,
 202        *DB_CREATABLES,
 203    }
 204
 205    ID_VAR_TOKENS = {
 206        TokenType.VAR,
 207        TokenType.ANTI,
 208        TokenType.APPLY,
 209        TokenType.AUTO_INCREMENT,
 210        TokenType.BEGIN,
 211        TokenType.BOTH,
 212        TokenType.BUCKET,
 213        TokenType.CACHE,
 214        TokenType.CASCADE,
 215        TokenType.COLLATE,
 216        TokenType.COMMAND,
 217        TokenType.COMMENT,
 218        TokenType.COMMIT,
 219        TokenType.COMPOUND,
 220        TokenType.CONSTRAINT,
 221        TokenType.DEFAULT,
 222        TokenType.DELETE,
 223        TokenType.DESCRIBE,
 224        TokenType.DIV,
 225        TokenType.END,
 226        TokenType.EXECUTE,
 227        TokenType.ESCAPE,
 228        TokenType.FALSE,
 229        TokenType.FIRST,
 230        TokenType.FILTER,
 231        TokenType.FOLLOWING,
 232        TokenType.FORMAT,
 233        TokenType.FULL,
 234        TokenType.IF,
 235        TokenType.IS,
 236        TokenType.ISNULL,
 237        TokenType.INTERVAL,
 238        TokenType.KEEP,
 239        TokenType.LAZY,
 240        TokenType.LEADING,
 241        TokenType.LEFT,
 242        TokenType.LOCAL,
 243        TokenType.MATERIALIZED,
 244        TokenType.MERGE,
 245        TokenType.NATURAL,
 246        TokenType.NEXT,
 247        TokenType.OFFSET,
 248        TokenType.ONLY,
 249        TokenType.OPTIONS,
 250        TokenType.ORDINALITY,
 251        TokenType.OVERWRITE,
 252        TokenType.PARTITION,
 253        TokenType.PERCENT,
 254        TokenType.PIVOT,
 255        TokenType.PRAGMA,
 256        TokenType.PRECEDING,
 257        TokenType.RANGE,
 258        TokenType.REFERENCES,
 259        TokenType.RIGHT,
 260        TokenType.ROW,
 261        TokenType.ROWS,
 262        TokenType.SEED,
 263        TokenType.SEMI,
 264        TokenType.SET,
 265        TokenType.SETTINGS,
 266        TokenType.SHOW,
 267        TokenType.SORTKEY,
 268        TokenType.TEMPORARY,
 269        TokenType.TOP,
 270        TokenType.TRAILING,
 271        TokenType.TRUE,
 272        TokenType.UNBOUNDED,
 273        TokenType.UNIQUE,
 274        TokenType.UNLOGGED,
 275        TokenType.UNPIVOT,
 276        TokenType.VOLATILE,
 277        TokenType.WINDOW,
 278        *CREATABLES,
 279        *SUBQUERY_PREDICATES,
 280        *TYPE_TOKENS,
 281        *NO_PAREN_FUNCTIONS,
 282    }
 283
 284    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 285
 286    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 287        TokenType.APPLY,
 288        TokenType.FULL,
 289        TokenType.LEFT,
 290        TokenType.NATURAL,
 291        TokenType.OFFSET,
 292        TokenType.RIGHT,
 293        TokenType.WINDOW,
 294    }
 295
 296    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 297
 298    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 299
 300    TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}
 301
 302    FUNC_TOKENS = {
 303        TokenType.COMMAND,
 304        TokenType.CURRENT_DATE,
 305        TokenType.CURRENT_DATETIME,
 306        TokenType.CURRENT_TIMESTAMP,
 307        TokenType.CURRENT_TIME,
 308        TokenType.CURRENT_USER,
 309        TokenType.FILTER,
 310        TokenType.FIRST,
 311        TokenType.FORMAT,
 312        TokenType.GLOB,
 313        TokenType.IDENTIFIER,
 314        TokenType.INDEX,
 315        TokenType.ISNULL,
 316        TokenType.ILIKE,
 317        TokenType.LIKE,
 318        TokenType.MERGE,
 319        TokenType.OFFSET,
 320        TokenType.PRIMARY_KEY,
 321        TokenType.RANGE,
 322        TokenType.REPLACE,
 323        TokenType.ROW,
 324        TokenType.UNNEST,
 325        TokenType.VAR,
 326        TokenType.LEFT,
 327        TokenType.RIGHT,
 328        TokenType.DATE,
 329        TokenType.DATETIME,
 330        TokenType.TABLE,
 331        TokenType.TIMESTAMP,
 332        TokenType.TIMESTAMPTZ,
 333        TokenType.WINDOW,
 334        *TYPE_TOKENS,
 335        *SUBQUERY_PREDICATES,
 336    }
 337
 338    CONJUNCTION = {
 339        TokenType.AND: exp.And,
 340        TokenType.OR: exp.Or,
 341    }
 342
 343    EQUALITY = {
 344        TokenType.EQ: exp.EQ,
 345        TokenType.NEQ: exp.NEQ,
 346        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 347    }
 348
 349    COMPARISON = {
 350        TokenType.GT: exp.GT,
 351        TokenType.GTE: exp.GTE,
 352        TokenType.LT: exp.LT,
 353        TokenType.LTE: exp.LTE,
 354    }
 355
 356    BITWISE = {
 357        TokenType.AMP: exp.BitwiseAnd,
 358        TokenType.CARET: exp.BitwiseXor,
 359        TokenType.PIPE: exp.BitwiseOr,
 360        TokenType.DPIPE: exp.DPipe,
 361    }
 362
 363    TERM = {
 364        TokenType.DASH: exp.Sub,
 365        TokenType.PLUS: exp.Add,
 366        TokenType.MOD: exp.Mod,
 367        TokenType.COLLATE: exp.Collate,
 368    }
 369
 370    FACTOR = {
 371        TokenType.DIV: exp.IntDiv,
 372        TokenType.LR_ARROW: exp.Distance,
 373        TokenType.SLASH: exp.Div,
 374        TokenType.STAR: exp.Mul,
 375    }
 376
 377    TIMESTAMPS = {
 378        TokenType.TIME,
 379        TokenType.TIMESTAMP,
 380        TokenType.TIMESTAMPTZ,
 381        TokenType.TIMESTAMPLTZ,
 382    }
 383
 384    SET_OPERATIONS = {
 385        TokenType.UNION,
 386        TokenType.INTERSECT,
 387        TokenType.EXCEPT,
 388    }
 389
 390    JOIN_SIDES = {
 391        TokenType.LEFT,
 392        TokenType.RIGHT,
 393        TokenType.FULL,
 394    }
 395
 396    JOIN_KINDS = {
 397        TokenType.INNER,
 398        TokenType.OUTER,
 399        TokenType.CROSS,
 400        TokenType.SEMI,
 401        TokenType.ANTI,
 402    }
 403
 404    LAMBDAS = {
 405        TokenType.ARROW: lambda self, expressions: self.expression(
 406            exp.Lambda,
 407            this=self._replace_lambda(
 408                self._parse_conjunction(),
 409                {node.name for node in expressions},
 410            ),
 411            expressions=expressions,
 412        ),
 413        TokenType.FARROW: lambda self, expressions: self.expression(
 414            exp.Kwarg,
 415            this=exp.Var(this=expressions[0].name),
 416            expression=self._parse_conjunction(),
 417        ),
 418    }
 419
 420    COLUMN_OPERATORS = {
 421        TokenType.DOT: None,
 422        TokenType.DCOLON: lambda self, this, to: self.expression(
 423            exp.Cast if self.STRICT_CAST else exp.TryCast,
 424            this=this,
 425            to=to,
 426        ),
 427        TokenType.ARROW: lambda self, this, path: self.expression(
 428            exp.JSONExtract,
 429            this=this,
 430            expression=path,
 431        ),
 432        TokenType.DARROW: lambda self, this, path: self.expression(
 433            exp.JSONExtractScalar,
 434            this=this,
 435            expression=path,
 436        ),
 437        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 438            exp.JSONBExtract,
 439            this=this,
 440            expression=path,
 441        ),
 442        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 443            exp.JSONBExtractScalar,
 444            this=this,
 445            expression=path,
 446        ),
 447        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 448            exp.JSONBContains,
 449            this=this,
 450            expression=key,
 451        ),
 452    }
 453
 454    EXPRESSION_PARSERS = {
 455        exp.Column: lambda self: self._parse_column(),
 456        exp.DataType: lambda self: self._parse_types(),
 457        exp.From: lambda self: self._parse_from(),
 458        exp.Group: lambda self: self._parse_group(),
 459        exp.Identifier: lambda self: self._parse_id_var(),
 460        exp.Lateral: lambda self: self._parse_lateral(),
 461        exp.Join: lambda self: self._parse_join(),
 462        exp.Order: lambda self: self._parse_order(),
 463        exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 464        exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 465        exp.Lambda: lambda self: self._parse_lambda(),
 466        exp.Limit: lambda self: self._parse_limit(),
 467        exp.Offset: lambda self: self._parse_offset(),
 468        exp.TableAlias: lambda self: self._parse_table_alias(),
 469        exp.Table: lambda self: self._parse_table(),
 470        exp.Condition: lambda self: self._parse_conjunction(),
 471        exp.Expression: lambda self: self._parse_statement(),
 472        exp.Properties: lambda self: self._parse_properties(),
 473        exp.Where: lambda self: self._parse_where(),
 474        exp.Ordered: lambda self: self._parse_ordered(),
 475        exp.Having: lambda self: self._parse_having(),
 476        exp.With: lambda self: self._parse_with(),
 477        exp.Window: lambda self: self._parse_named_window(),
 478        exp.Qualify: lambda self: self._parse_qualify(),
 479        exp.Returning: lambda self: self._parse_returning(),
 480        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 481    }
 482
 483    STATEMENT_PARSERS = {
 484        TokenType.ALTER: lambda self: self._parse_alter(),
 485        TokenType.BEGIN: lambda self: self._parse_transaction(),
 486        TokenType.CACHE: lambda self: self._parse_cache(),
 487        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 488        TokenType.COMMENT: lambda self: self._parse_comment(),
 489        TokenType.CREATE: lambda self: self._parse_create(),
 490        TokenType.DELETE: lambda self: self._parse_delete(),
 491        TokenType.DESC: lambda self: self._parse_describe(),
 492        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 493        TokenType.DROP: lambda self: self._parse_drop(),
 494        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 495        TokenType.INSERT: lambda self: self._parse_insert(),
 496        TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
 497        TokenType.MERGE: lambda self: self._parse_merge(),
 498        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 499        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 500        TokenType.SET: lambda self: self._parse_set(),
 501        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 502        TokenType.UPDATE: lambda self: self._parse_update(),
 503        TokenType.USE: lambda self: self.expression(
 504            exp.Use,
 505            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 506            and exp.Var(this=self._prev.text),
 507            this=self._parse_table(schema=False),
 508        ),
 509    }
 510
 511    UNARY_PARSERS = {
 512        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 513        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 514        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 515        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 516    }
 517
 518    PRIMARY_PARSERS = {
 519        TokenType.STRING: lambda self, token: self.expression(
 520            exp.Literal, this=token.text, is_string=True
 521        ),
 522        TokenType.NUMBER: lambda self, token: self.expression(
 523            exp.Literal, this=token.text, is_string=False
 524        ),
 525        TokenType.STAR: lambda self, _: self.expression(
 526            exp.Star,
 527            **{"except": self._parse_except(), "replace": self._parse_replace()},
 528        ),
 529        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 530        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 531        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 532        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 533        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 534        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 535        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 536        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 537        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 538    }
 539
 540    PLACEHOLDER_PARSERS = {
 541        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 542        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 543        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 544        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 545        else None,
 546    }
 547
 548    RANGE_PARSERS = {
 549        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 550        TokenType.GLOB: binary_range_parser(exp.Glob),
 551        TokenType.ILIKE: binary_range_parser(exp.ILike),
 552        TokenType.IN: lambda self, this: self._parse_in(this),
 553        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 554        TokenType.IS: lambda self, this: self._parse_is(this),
 555        TokenType.LIKE: binary_range_parser(exp.Like),
 556        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 557        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 558        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 559    }
 560
 561    PROPERTY_PARSERS = {
 562        "AFTER": lambda self: self._parse_afterjournal(
 563            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 564        ),
 565        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 566        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 567        "BEFORE": lambda self: self._parse_journal(
 568            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 569        ),
 570        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 571        "CHARACTER SET": lambda self: self._parse_character_set(),
 572        "CHECKSUM": lambda self: self._parse_checksum(),
 573        "CLUSTER BY": lambda self: self.expression(
 574            exp.Cluster, expressions=self._parse_csv(self._parse_ordered)
 575        ),
 576        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 577        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 578        "DATABLOCKSIZE": lambda self: self._parse_datablocksize(
 579            default=self._prev.text.upper() == "DEFAULT"
 580        ),
 581        "DEFINER": lambda self: self._parse_definer(),
 582        "DETERMINISTIC": lambda self: self.expression(
 583            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 584        ),
 585        "DISTKEY": lambda self: self._parse_distkey(),
 586        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 587        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
 588        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 589        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 590        "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
 591        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 592        "FREESPACE": lambda self: self._parse_freespace(),
 593        "GLOBAL": lambda self: self._parse_temporary(global_=True),
 594        "IMMUTABLE": lambda self: self.expression(
 595            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 596        ),
 597        "JOURNAL": lambda self: self._parse_journal(
 598            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 599        ),
 600        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 601        "LIKE": lambda self: self._parse_create_like(),
 602        "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
 603        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 604        "LOCK": lambda self: self._parse_locking(),
 605        "LOCKING": lambda self: self._parse_locking(),
 606        "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
 607        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 608        "MAX": lambda self: self._parse_datablocksize(),
 609        "MAXIMUM": lambda self: self._parse_datablocksize(),
 610        "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
 611            no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
 612        ),
 613        "MIN": lambda self: self._parse_datablocksize(),
 614        "MINIMUM": lambda self: self._parse_datablocksize(),
 615        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 616        "NO": lambda self: self._parse_noprimaryindex(),
 617        "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
 618        "ON": lambda self: self._parse_oncommit(),
 619        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
 620        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 621        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 622        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 623        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 624        "RETURNS": lambda self: self._parse_returns(),
 625        "ROW": lambda self: self._parse_row(),
 626        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 627        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 628        "SETTINGS": lambda self: self.expression(
 629            exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
 630        ),
 631        "SORTKEY": lambda self: self._parse_sortkey(),
 632        "STABLE": lambda self: self.expression(
 633            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 634        ),
 635        "STORED": lambda self: self._parse_stored(),
 636        "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 637        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 638        "TEMP": lambda self: self._parse_temporary(global_=False),
 639        "TEMPORARY": lambda self: self._parse_temporary(global_=False),
 640        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 641        "TTL": lambda self: self._parse_ttl(),
 642        "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 643        "VOLATILE": lambda self: self._parse_volatile_property(),
 644        "WITH": lambda self: self._parse_with_property(),
 645    }
 646
 647    CONSTRAINT_PARSERS = {
 648        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 649        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 650        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 651        "CHARACTER SET": lambda self: self.expression(
 652            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 653        ),
 654        "CHECK": lambda self: self.expression(
 655            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 656        ),
 657        "COLLATE": lambda self: self.expression(
 658            exp.CollateColumnConstraint, this=self._parse_var()
 659        ),
 660        "COMMENT": lambda self: self.expression(
 661            exp.CommentColumnConstraint, this=self._parse_string()
 662        ),
 663        "COMPRESS": lambda self: self._parse_compress(),
 664        "DEFAULT": lambda self: self.expression(
 665            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 666        ),
 667        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 668        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 669        "FORMAT": lambda self: self.expression(
 670            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 671        ),
 672        "GENERATED": lambda self: self._parse_generated_as_identity(),
 673        "IDENTITY": lambda self: self._parse_auto_increment(),
 674        "INLINE": lambda self: self._parse_inline(),
 675        "LIKE": lambda self: self._parse_create_like(),
 676        "NOT": lambda self: self._parse_not_constraint(),
 677        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 678        "ON": lambda self: self._match(TokenType.UPDATE)
 679        and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()),
 680        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 681        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 682        "REFERENCES": lambda self: self._parse_references(match=False),
 683        "TITLE": lambda self: self.expression(
 684            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 685        ),
 686        "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
 687        "UNIQUE": lambda self: self._parse_unique(),
 688        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 689    }
 690
 691    ALTER_PARSERS = {
 692        "ADD": lambda self: self._parse_alter_table_add(),
 693        "ALTER": lambda self: self._parse_alter_table_alter(),
 694        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 695        "DROP": lambda self: self._parse_alter_table_drop(),
 696        "RENAME": lambda self: self._parse_alter_table_rename(),
 697    }
 698
 699    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 700
 701    NO_PAREN_FUNCTION_PARSERS = {
 702        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 703        TokenType.CASE: lambda self: self._parse_case(),
 704        TokenType.IF: lambda self: self._parse_if(),
 705        TokenType.NEXT_VALUE_FOR: lambda self: self.expression(
 706            exp.NextValueFor,
 707            this=self._parse_column(),
 708            order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order),
 709        ),
 710    }
 711
 712    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 713        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 714        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 715        "DECODE": lambda self: self._parse_decode(),
 716        "EXTRACT": lambda self: self._parse_extract(),
 717        "JSON_OBJECT": lambda self: self._parse_json_object(),
 718        "LOG": lambda self: self._parse_logarithm(),
 719        "MATCH": lambda self: self._parse_match_against(),
 720        "OPENJSON": lambda self: self._parse_open_json(),
 721        "POSITION": lambda self: self._parse_position(),
 722        "STRING_AGG": lambda self: self._parse_string_agg(),
 723        "SUBSTRING": lambda self: self._parse_substring(),
 724        "TRIM": lambda self: self._parse_trim(),
 725        "TRY_CAST": lambda self: self._parse_cast(False),
 726        "TRY_CONVERT": lambda self: self._parse_convert(False),
 727    }
 728
 729    QUERY_MODIFIER_PARSERS = {
 730        "joins": lambda self: list(iter(self._parse_join, None)),
 731        "laterals": lambda self: list(iter(self._parse_lateral, None)),
 732        "match": lambda self: self._parse_match_recognize(),
 733        "where": lambda self: self._parse_where(),
 734        "group": lambda self: self._parse_group(),
 735        "having": lambda self: self._parse_having(),
 736        "qualify": lambda self: self._parse_qualify(),
 737        "windows": lambda self: self._parse_window_clause(),
 738        "order": lambda self: self._parse_order(),
 739        "limit": lambda self: self._parse_limit(),
 740        "offset": lambda self: self._parse_offset(),
 741        "lock": lambda self: self._parse_lock(),
 742        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 743    }
 744
 745    SET_PARSERS = {
 746        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 747        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 748        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 749        "TRANSACTION": lambda self: self._parse_set_transaction(),
 750    }
 751
 752    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 753
 754    TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {}
 755
 756    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 757
 758    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 759
 760    TRANSACTION_CHARACTERISTICS = {
 761        "ISOLATION LEVEL REPEATABLE READ",
 762        "ISOLATION LEVEL READ COMMITTED",
 763        "ISOLATION LEVEL READ UNCOMMITTED",
 764        "ISOLATION LEVEL SERIALIZABLE",
 765        "READ WRITE",
 766        "READ ONLY",
 767    }
 768
 769    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 770
 771    CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
 772
 773    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 774    WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
 775
 776    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 777
 778    STRICT_CAST = True
 779
 780    CONVERT_TYPE_FIRST = False
 781
 782    PREFIXED_PIVOT_COLUMNS = False
 783    IDENTIFY_PIVOT_STRINGS = False
 784
 785    LOG_BASE_FIRST = True
 786    LOG_DEFAULTS_TO_LN = False
 787
 788    __slots__ = (
 789        "error_level",
 790        "error_message_context",
 791        "sql",
 792        "errors",
 793        "index_offset",
 794        "unnest_column_only",
 795        "alias_post_tablesample",
 796        "max_errors",
 797        "null_ordering",
 798        "_tokens",
 799        "_index",
 800        "_curr",
 801        "_next",
 802        "_prev",
 803        "_prev_comments",
 804        "_show_trie",
 805        "_set_trie",
 806    )
 807
 808    def __init__(
 809        self,
 810        error_level: t.Optional[ErrorLevel] = None,
 811        error_message_context: int = 100,
 812        index_offset: int = 0,
 813        unnest_column_only: bool = False,
 814        alias_post_tablesample: bool = False,
 815        max_errors: int = 3,
 816        null_ordering: t.Optional[str] = None,
 817    ):
 818        self.error_level = error_level or ErrorLevel.IMMEDIATE
 819        self.error_message_context = error_message_context
 820        self.index_offset = index_offset
 821        self.unnest_column_only = unnest_column_only
 822        self.alias_post_tablesample = alias_post_tablesample
 823        self.max_errors = max_errors
 824        self.null_ordering = null_ordering
 825        self.reset()
 826
 827    def reset(self):
 828        self.sql = ""
 829        self.errors = []
 830        self._tokens = []
 831        self._index = 0
 832        self._curr = None
 833        self._next = None
 834        self._prev = None
 835        self._prev_comments = None
 836
 837    def parse(
 838        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 839    ) -> t.List[t.Optional[exp.Expression]]:
 840        """
 841        Parses a list of tokens and returns a list of syntax trees, one tree
 842        per parsed SQL statement.
 843
 844        Args:
 845            raw_tokens: the list of tokens.
 846            sql: the original SQL string, used to produce helpful debug messages.
 847
 848        Returns:
 849            The list of syntax trees.
 850        """
 851        return self._parse(
 852            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 853        )
 854
 855    def parse_into(
 856        self,
 857        expression_types: exp.IntoType,
 858        raw_tokens: t.List[Token],
 859        sql: t.Optional[str] = None,
 860    ) -> t.List[t.Optional[exp.Expression]]:
 861        """
 862        Parses a list of tokens into a given Expression type. If a collection of Expression
 863        types is given instead, this method will try to parse the token list into each one
 864        of them, stopping at the first for which the parsing succeeds.
 865
 866        Args:
 867            expression_types: the expression type(s) to try and parse the token list into.
 868            raw_tokens: the list of tokens.
 869            sql: the original SQL string, used to produce helpful debug messages.
 870
 871        Returns:
 872            The target Expression.
 873        """
 874        errors = []
 875        for expression_type in ensure_collection(expression_types):
 876            parser = self.EXPRESSION_PARSERS.get(expression_type)
 877            if not parser:
 878                raise TypeError(f"No parser registered for {expression_type}")
 879            try:
 880                return self._parse(parser, raw_tokens, sql)
 881            except ParseError as e:
 882                e.errors[0]["into_expression"] = expression_type
 883                errors.append(e)
 884        raise ParseError(
 885            f"Failed to parse into {expression_types}",
 886            errors=merge_errors(errors),
 887        ) from errors[-1]
 888
 889    def _parse(
 890        self,
 891        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 892        raw_tokens: t.List[Token],
 893        sql: t.Optional[str] = None,
 894    ) -> t.List[t.Optional[exp.Expression]]:
 895        self.reset()
 896        self.sql = sql or ""
 897        total = len(raw_tokens)
 898        chunks: t.List[t.List[Token]] = [[]]
 899
 900        for i, token in enumerate(raw_tokens):
 901            if token.token_type == TokenType.SEMICOLON:
 902                if i < total - 1:
 903                    chunks.append([])
 904            else:
 905                chunks[-1].append(token)
 906
 907        expressions = []
 908
 909        for tokens in chunks:
 910            self._index = -1
 911            self._tokens = tokens
 912            self._advance()
 913
 914            expressions.append(parse_method(self))
 915
 916            if self._index < len(self._tokens):
 917                self.raise_error("Invalid expression / Unexpected token")
 918
 919            self.check_errors()
 920
 921        return expressions
 922
 923    def check_errors(self) -> None:
 924        """
 925        Logs or raises any found errors, depending on the chosen error level setting.
 926        """
 927        if self.error_level == ErrorLevel.WARN:
 928            for error in self.errors:
 929                logger.error(str(error))
 930        elif self.error_level == ErrorLevel.RAISE and self.errors:
 931            raise ParseError(
 932                concat_messages(self.errors, self.max_errors),
 933                errors=merge_errors(self.errors),
 934            )
 935
 936    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 937        """
 938        Appends an error in the list of recorded errors or raises it, depending on the chosen
 939        error level setting.
 940        """
 941        token = token or self._curr or self._prev or Token.string("")
 942        start = token.start
 943        end = token.end
 944        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 945        highlight = self.sql[start:end]
 946        end_context = self.sql[end : end + self.error_message_context]
 947
 948        error = ParseError.new(
 949            f"{message}. Line {token.line}, Col: {token.col}.\n"
 950            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 951            description=message,
 952            line=token.line,
 953            col=token.col,
 954            start_context=start_context,
 955            highlight=highlight,
 956            end_context=end_context,
 957        )
 958
 959        if self.error_level == ErrorLevel.IMMEDIATE:
 960            raise error
 961
 962        self.errors.append(error)
 963
 964    def expression(
 965        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
 966    ) -> E:
 967        """
 968        Creates a new, validated Expression.
 969
 970        Args:
 971            exp_class: the expression class to instantiate.
 972            comments: an optional list of comments to attach to the expression.
 973            kwargs: the arguments to set for the expression along with their respective values.
 974
 975        Returns:
 976            The target expression.
 977        """
 978        instance = exp_class(**kwargs)
 979        instance.add_comments(comments) if comments else self._add_comments(instance)
 980        self.validate_expression(instance)
 981        return instance
 982
 983    def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
 984        if expression and self._prev_comments:
 985            expression.add_comments(self._prev_comments)
 986            self._prev_comments = None
 987
 988    def validate_expression(
 989        self, expression: exp.Expression, args: t.Optional[t.List] = None
 990    ) -> None:
 991        """
 992        Validates an already instantiated expression, making sure that all its mandatory arguments
 993        are set.
 994
 995        Args:
 996            expression: the expression to validate.
 997            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 998        """
 999        if self.error_level == ErrorLevel.IGNORE:
1000            return
1001
1002        for error_message in expression.error_messages(args):
1003            self.raise_error(error_message)
1004
1005    def _find_sql(self, start: Token, end: Token) -> str:
1006        return self.sql[start.start : end.end]
1007
1008    def _advance(self, times: int = 1) -> None:
1009        self._index += times
1010        self._curr = seq_get(self._tokens, self._index)
1011        self._next = seq_get(self._tokens, self._index + 1)
1012        if self._index > 0:
1013            self._prev = self._tokens[self._index - 1]
1014            self._prev_comments = self._prev.comments
1015        else:
1016            self._prev = None
1017            self._prev_comments = None
1018
1019    def _retreat(self, index: int) -> None:
1020        if index != self._index:
1021            self._advance(index - self._index)
1022
1023    def _parse_command(self) -> exp.Command:
1024        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
1025
1026    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
1027        start = self._prev
1028        exists = self._parse_exists() if allow_exists else None
1029
1030        self._match(TokenType.ON)
1031
1032        kind = self._match_set(self.CREATABLES) and self._prev
1033
1034        if not kind:
1035            return self._parse_as_command(start)
1036
1037        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1038            this = self._parse_user_defined_function(kind=kind.token_type)
1039        elif kind.token_type == TokenType.TABLE:
1040            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1041        elif kind.token_type == TokenType.COLUMN:
1042            this = self._parse_column()
1043        else:
1044            this = self._parse_id_var()
1045
1046        self._match(TokenType.IS)
1047
1048        return self.expression(
1049            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
1050        )
1051
1052    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1053    def _parse_ttl(self) -> exp.Expression:
1054        def _parse_ttl_action() -> t.Optional[exp.Expression]:
1055            this = self._parse_bitwise()
1056
1057            if self._match_text_seq("DELETE"):
1058                return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1059            if self._match_text_seq("RECOMPRESS"):
1060                return self.expression(
1061                    exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1062                )
1063            if self._match_text_seq("TO", "DISK"):
1064                return self.expression(
1065                    exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1066                )
1067            if self._match_text_seq("TO", "VOLUME"):
1068                return self.expression(
1069                    exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1070                )
1071
1072            return this
1073
1074        expressions = self._parse_csv(_parse_ttl_action)
1075        where = self._parse_where()
1076        group = self._parse_group()
1077
1078        aggregates = None
1079        if group and self._match(TokenType.SET):
1080            aggregates = self._parse_csv(self._parse_set_item)
1081
1082        return self.expression(
1083            exp.MergeTreeTTL,
1084            expressions=expressions,
1085            where=where,
1086            group=group,
1087            aggregates=aggregates,
1088        )
1089
1090    def _parse_statement(self) -> t.Optional[exp.Expression]:
1091        if self._curr is None:
1092            return None
1093
1094        if self._match_set(self.STATEMENT_PARSERS):
1095            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1096
1097        if self._match_set(Tokenizer.COMMANDS):
1098            return self._parse_command()
1099
1100        expression = self._parse_expression()
1101        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1102        return self._parse_query_modifiers(expression)
1103
1104    def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]:
1105        start = self._prev
1106        temporary = self._match(TokenType.TEMPORARY)
1107        materialized = self._match(TokenType.MATERIALIZED)
1108        kind = self._match_set(self.CREATABLES) and self._prev.text
1109        if not kind:
1110            return self._parse_as_command(start)
1111
1112        return self.expression(
1113            exp.Drop,
1114            exists=self._parse_exists(),
1115            this=self._parse_table(schema=True),
1116            kind=kind,
1117            temporary=temporary,
1118            materialized=materialized,
1119            cascade=self._match(TokenType.CASCADE),
1120            constraints=self._match_text_seq("CONSTRAINTS"),
1121            purge=self._match_text_seq("PURGE"),
1122        )
1123
1124    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1125        return (
1126            self._match(TokenType.IF)
1127            and (not not_ or self._match(TokenType.NOT))
1128            and self._match(TokenType.EXISTS)
1129        )
1130
1131    def _parse_create(self) -> t.Optional[exp.Expression]:
1132        start = self._prev
1133        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1134            TokenType.OR, TokenType.REPLACE
1135        )
1136        unique = self._match(TokenType.UNIQUE)
1137
1138        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1139            self._match(TokenType.TABLE)
1140
1141        properties = None
1142        create_token = self._match_set(self.CREATABLES) and self._prev
1143
1144        if not create_token:
1145            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1146            create_token = self._match_set(self.CREATABLES) and self._prev
1147
1148            if not properties or not create_token:
1149                return self._parse_as_command(start)
1150
1151        exists = self._parse_exists(not_=True)
1152        this = None
1153        expression = None
1154        indexes = None
1155        no_schema_binding = None
1156        begin = None
1157        clone = None
1158
1159        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1160            this = self._parse_user_defined_function(kind=create_token.token_type)
1161            temp_properties = self._parse_properties()
1162            if properties and temp_properties:
1163                properties.expressions.extend(temp_properties.expressions)
1164            elif temp_properties:
1165                properties = temp_properties
1166
1167            self._match(TokenType.ALIAS)
1168            begin = self._match(TokenType.BEGIN)
1169            return_ = self._match_text_seq("RETURN")
1170            expression = self._parse_statement()
1171
1172            if return_:
1173                expression = self.expression(exp.Return, this=expression)
1174        elif create_token.token_type == TokenType.INDEX:
1175            this = self._parse_index()
1176        elif create_token.token_type in self.DB_CREATABLES:
1177            table_parts = self._parse_table_parts(schema=True)
1178
1179            # exp.Properties.Location.POST_NAME
1180            if self._match(TokenType.COMMA):
1181                temp_properties = self._parse_properties(before=True)
1182                if properties and temp_properties:
1183                    properties.expressions.extend(temp_properties.expressions)
1184                elif temp_properties:
1185                    properties = temp_properties
1186
1187            this = self._parse_schema(this=table_parts)
1188
1189            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1190            temp_properties = self._parse_properties()
1191            if properties and temp_properties:
1192                properties.expressions.extend(temp_properties.expressions)
1193            elif temp_properties:
1194                properties = temp_properties
1195
1196            self._match(TokenType.ALIAS)
1197
1198            # exp.Properties.Location.POST_ALIAS
1199            if not (
1200                self._match(TokenType.SELECT, advance=False)
1201                or self._match(TokenType.WITH, advance=False)
1202                or self._match(TokenType.L_PAREN, advance=False)
1203            ):
1204                temp_properties = self._parse_properties()
1205                if properties and temp_properties:
1206                    properties.expressions.extend(temp_properties.expressions)
1207                elif temp_properties:
1208                    properties = temp_properties
1209
1210            expression = self._parse_ddl_select()
1211
1212            if create_token.token_type == TokenType.TABLE:
1213                # exp.Properties.Location.POST_EXPRESSION
1214                temp_properties = self._parse_properties()
1215                if properties and temp_properties:
1216                    properties.expressions.extend(temp_properties.expressions)
1217                elif temp_properties:
1218                    properties = temp_properties
1219
1220                indexes = []
1221                while True:
1222                    index = self._parse_create_table_index()
1223
1224                    # exp.Properties.Location.POST_INDEX
1225                    if self._match(TokenType.PARTITION_BY, advance=False):
1226                        temp_properties = self._parse_properties()
1227                        if properties and temp_properties:
1228                            properties.expressions.extend(temp_properties.expressions)
1229                        elif temp_properties:
1230                            properties = temp_properties
1231
1232                    if not index:
1233                        break
1234                    else:
1235                        indexes.append(index)
1236            elif create_token.token_type == TokenType.VIEW:
1237                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1238                    no_schema_binding = True
1239
1240            if self._match_text_seq("CLONE"):
1241                clone = self._parse_table(schema=True)
1242                when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper()
1243                clone_kind = (
1244                    self._match(TokenType.L_PAREN)
1245                    and self._match_texts(self.CLONE_KINDS)
1246                    and self._prev.text.upper()
1247                )
1248                clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
1249                self._match(TokenType.R_PAREN)
1250                clone = self.expression(
1251                    exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression
1252                )
1253
1254        return self.expression(
1255            exp.Create,
1256            this=this,
1257            kind=create_token.text,
1258            replace=replace,
1259            unique=unique,
1260            expression=expression,
1261            exists=exists,
1262            properties=properties,
1263            indexes=indexes,
1264            no_schema_binding=no_schema_binding,
1265            begin=begin,
1266            clone=clone,
1267        )
1268
1269    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1270        self._match(TokenType.COMMA)
1271
1272        # parsers look to _prev for no/dual/default, so need to consume first
1273        self._match_text_seq("NO")
1274        self._match_text_seq("DUAL")
1275        self._match_text_seq("DEFAULT")
1276
1277        if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
1278            return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
1279
1280        return None
1281
1282    def _parse_property(self) -> t.Optional[exp.Expression]:
1283        if self._match_texts(self.PROPERTY_PARSERS):
1284            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1285
1286        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1287            return self._parse_character_set(default=True)
1288
1289        if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY):
1290            return self._parse_sortkey(compound=True)
1291
1292        if self._match_text_seq("SQL", "SECURITY"):
1293            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1294
1295        assignment = self._match_pair(
1296            TokenType.VAR, TokenType.EQ, advance=False
1297        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1298
1299        if assignment:
1300            key = self._parse_var_or_string()
1301            self._match(TokenType.EQ)
1302            return self.expression(exp.Property, this=key, value=self._parse_column())
1303
1304        return None
1305
1306    def _parse_stored(self) -> exp.Expression:
1307        self._match(TokenType.ALIAS)
1308
1309        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1310        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1311
1312        return self.expression(
1313            exp.FileFormatProperty,
1314            this=self.expression(
1315                exp.InputOutputFormat, input_format=input_format, output_format=output_format
1316            )
1317            if input_format or output_format
1318            else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1319        )
1320
1321    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1322        self._match(TokenType.EQ)
1323        self._match(TokenType.ALIAS)
1324        return self.expression(exp_class, this=self._parse_field())
1325
1326    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1327        properties = []
1328
1329        while True:
1330            if before:
1331                identified_property = self._parse_property_before()
1332            else:
1333                identified_property = self._parse_property()
1334
1335            if not identified_property:
1336                break
1337            for p in ensure_list(identified_property):
1338                properties.append(p)
1339
1340        if properties:
1341            return self.expression(exp.Properties, expressions=properties)
1342
1343        return None
1344
1345    def _parse_fallback(self, no=False) -> exp.Expression:
1346        self._match_text_seq("FALLBACK")
1347        return self.expression(
1348            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1349        )
1350
1351    def _parse_volatile_property(self) -> exp.Expression:
1352        if self._index >= 2:
1353            pre_volatile_token = self._tokens[self._index - 2]
1354        else:
1355            pre_volatile_token = None
1356
1357        if pre_volatile_token and pre_volatile_token.token_type in (
1358            TokenType.CREATE,
1359            TokenType.REPLACE,
1360            TokenType.UNIQUE,
1361        ):
1362            return exp.VolatileProperty()
1363
1364        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1365
1366    def _parse_with_property(
1367        self,
1368    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1369        self._match(TokenType.WITH)
1370        if self._match(TokenType.L_PAREN, advance=False):
1371            return self._parse_wrapped_csv(self._parse_property)
1372
1373        if self._match_text_seq("JOURNAL"):
1374            return self._parse_withjournaltable()
1375
1376        if self._match_text_seq("DATA"):
1377            return self._parse_withdata(no=False)
1378        elif self._match_text_seq("NO", "DATA"):
1379            return self._parse_withdata(no=True)
1380
1381        if not self._next:
1382            return None
1383
1384        return self._parse_withisolatedloading()
1385
1386    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1387    def _parse_definer(self) -> t.Optional[exp.Expression]:
1388        self._match(TokenType.EQ)
1389
1390        user = self._parse_id_var()
1391        self._match(TokenType.PARAMETER)
1392        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1393
1394        if not user or not host:
1395            return None
1396
1397        return exp.DefinerProperty(this=f"{user}@{host}")
1398
1399    def _parse_withjournaltable(self) -> exp.Expression:
1400        self._match(TokenType.TABLE)
1401        self._match(TokenType.EQ)
1402        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1403
1404    def _parse_log(self, no=False) -> exp.Expression:
1405        self._match_text_seq("LOG")
1406        return self.expression(exp.LogProperty, no=no)
1407
1408    def _parse_journal(self, no=False, dual=False) -> exp.Expression:
1409        before = self._match_text_seq("BEFORE")
1410        self._match_text_seq("JOURNAL")
1411        return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
1412
1413    def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
1414        self._match_text_seq("NOT")
1415        self._match_text_seq("LOCAL")
1416        self._match_text_seq("AFTER", "JOURNAL")
1417        return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
1418
1419    def _parse_checksum(self) -> exp.Expression:
1420        self._match_text_seq("CHECKSUM")
1421        self._match(TokenType.EQ)
1422
1423        on = None
1424        if self._match(TokenType.ON):
1425            on = True
1426        elif self._match_text_seq("OFF"):
1427            on = False
1428        default = self._match(TokenType.DEFAULT)
1429
1430        return self.expression(
1431            exp.ChecksumProperty,
1432            on=on,
1433            default=default,
1434        )
1435
1436    def _parse_freespace(self) -> exp.Expression:
1437        self._match_text_seq("FREESPACE")
1438        self._match(TokenType.EQ)
1439        return self.expression(
1440            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1441        )
1442
1443    def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
1444        self._match_text_seq("MERGEBLOCKRATIO")
1445        if self._match(TokenType.EQ):
1446            return self.expression(
1447                exp.MergeBlockRatioProperty,
1448                this=self._parse_number(),
1449                percent=self._match(TokenType.PERCENT),
1450            )
1451        else:
1452            return self.expression(
1453                exp.MergeBlockRatioProperty,
1454                no=no,
1455                default=default,
1456            )
1457
1458    def _parse_datablocksize(self, default=None) -> exp.Expression:
1459        if default:
1460            self._match_text_seq("DATABLOCKSIZE")
1461            return self.expression(exp.DataBlocksizeProperty, default=True)
1462        elif self._match_texts(("MIN", "MINIMUM")):
1463            self._match_text_seq("DATABLOCKSIZE")
1464            return self.expression(exp.DataBlocksizeProperty, min=True)
1465        elif self._match_texts(("MAX", "MAXIMUM")):
1466            self._match_text_seq("DATABLOCKSIZE")
1467            return self.expression(exp.DataBlocksizeProperty, min=False)
1468
1469        self._match_text_seq("DATABLOCKSIZE")
1470        self._match(TokenType.EQ)
1471        size = self._parse_number()
1472        units = None
1473        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1474            units = self._prev.text
1475        return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
1476
1477    def _parse_blockcompression(self) -> exp.Expression:
1478        self._match_text_seq("BLOCKCOMPRESSION")
1479        self._match(TokenType.EQ)
1480        always = self._match_text_seq("ALWAYS")
1481        manual = self._match_text_seq("MANUAL")
1482        never = self._match_text_seq("NEVER")
1483        default = self._match_text_seq("DEFAULT")
1484        autotemp = None
1485        if self._match_text_seq("AUTOTEMP"):
1486            autotemp = self._parse_schema()
1487
1488        return self.expression(
1489            exp.BlockCompressionProperty,
1490            always=always,
1491            manual=manual,
1492            never=never,
1493            default=default,
1494            autotemp=autotemp,
1495        )
1496
1497    def _parse_withisolatedloading(self) -> exp.Expression:
1498        no = self._match_text_seq("NO")
1499        concurrent = self._match_text_seq("CONCURRENT")
1500        self._match_text_seq("ISOLATED", "LOADING")
1501        for_all = self._match_text_seq("FOR", "ALL")
1502        for_insert = self._match_text_seq("FOR", "INSERT")
1503        for_none = self._match_text_seq("FOR", "NONE")
1504        return self.expression(
1505            exp.IsolatedLoadingProperty,
1506            no=no,
1507            concurrent=concurrent,
1508            for_all=for_all,
1509            for_insert=for_insert,
1510            for_none=for_none,
1511        )
1512
1513    def _parse_locking(self) -> exp.Expression:
1514        if self._match(TokenType.TABLE):
1515            kind = "TABLE"
1516        elif self._match(TokenType.VIEW):
1517            kind = "VIEW"
1518        elif self._match(TokenType.ROW):
1519            kind = "ROW"
1520        elif self._match_text_seq("DATABASE"):
1521            kind = "DATABASE"
1522        else:
1523            kind = None
1524
1525        if kind in ("DATABASE", "TABLE", "VIEW"):
1526            this = self._parse_table_parts()
1527        else:
1528            this = None
1529
1530        if self._match(TokenType.FOR):
1531            for_or_in = "FOR"
1532        elif self._match(TokenType.IN):
1533            for_or_in = "IN"
1534        else:
1535            for_or_in = None
1536
1537        if self._match_text_seq("ACCESS"):
1538            lock_type = "ACCESS"
1539        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1540            lock_type = "EXCLUSIVE"
1541        elif self._match_text_seq("SHARE"):
1542            lock_type = "SHARE"
1543        elif self._match_text_seq("READ"):
1544            lock_type = "READ"
1545        elif self._match_text_seq("WRITE"):
1546            lock_type = "WRITE"
1547        elif self._match_text_seq("CHECKSUM"):
1548            lock_type = "CHECKSUM"
1549        else:
1550            lock_type = None
1551
1552        override = self._match_text_seq("OVERRIDE")
1553
1554        return self.expression(
1555            exp.LockingProperty,
1556            this=this,
1557            kind=kind,
1558            for_or_in=for_or_in,
1559            lock_type=lock_type,
1560            override=override,
1561        )
1562
1563    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1564        if self._match(TokenType.PARTITION_BY):
1565            return self._parse_csv(self._parse_conjunction)
1566        return []
1567
1568    def _parse_partitioned_by(self) -> exp.Expression:
1569        self._match(TokenType.EQ)
1570        return self.expression(
1571            exp.PartitionedByProperty,
1572            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1573        )
1574
1575    def _parse_withdata(self, no=False) -> exp.Expression:
1576        if self._match_text_seq("AND", "STATISTICS"):
1577            statistics = True
1578        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1579            statistics = False
1580        else:
1581            statistics = None
1582
1583        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1584
1585    def _parse_noprimaryindex(self) -> exp.Expression:
1586        self._match_text_seq("PRIMARY", "INDEX")
1587        return exp.NoPrimaryIndexProperty()
1588
1589    def _parse_oncommit(self) -> exp.Expression:
1590        self._match_text_seq("COMMIT", "PRESERVE", "ROWS")
1591        return exp.OnCommitProperty()
1592
1593    def _parse_distkey(self) -> exp.Expression:
1594        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1595
1596    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1597        table = self._parse_table(schema=True)
1598        options = []
1599        while self._match_texts(("INCLUDING", "EXCLUDING")):
1600            this = self._prev.text.upper()
1601            id_var = self._parse_id_var()
1602
1603            if not id_var:
1604                return None
1605
1606            options.append(
1607                self.expression(
1608                    exp.Property,
1609                    this=this,
1610                    value=exp.Var(this=id_var.this.upper()),
1611                )
1612            )
1613        return self.expression(exp.LikeProperty, this=table, expressions=options)
1614
1615    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1616        return self.expression(
1617            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1618        )
1619
1620    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1621        self._match(TokenType.EQ)
1622        return self.expression(
1623            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1624        )
1625
1626    def _parse_returns(self) -> exp.Expression:
1627        value: t.Optional[exp.Expression]
1628        is_table = self._match(TokenType.TABLE)
1629
1630        if is_table:
1631            if self._match(TokenType.LT):
1632                value = self.expression(
1633                    exp.Schema,
1634                    this="TABLE",
1635                    expressions=self._parse_csv(self._parse_struct_types),
1636                )
1637                if not self._match(TokenType.GT):
1638                    self.raise_error("Expecting >")
1639            else:
1640                value = self._parse_schema(exp.Var(this="TABLE"))
1641        else:
1642            value = self._parse_types()
1643
1644        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1645
1646    def _parse_temporary(self, global_=False) -> exp.Expression:
1647        self._match(TokenType.TEMPORARY)  # in case calling from "GLOBAL"
1648        return self.expression(exp.TemporaryProperty, global_=global_)
1649
1650    def _parse_describe(self) -> exp.Expression:
1651        kind = self._match_set(self.CREATABLES) and self._prev.text
1652        this = self._parse_table()
1653
1654        return self.expression(exp.Describe, this=this, kind=kind)
1655
1656    def _parse_insert(self) -> exp.Expression:
1657        overwrite = self._match(TokenType.OVERWRITE)
1658        local = self._match(TokenType.LOCAL)
1659        alternative = None
1660
1661        if self._match_text_seq("DIRECTORY"):
1662            this: t.Optional[exp.Expression] = self.expression(
1663                exp.Directory,
1664                this=self._parse_var_or_string(),
1665                local=local,
1666                row_format=self._parse_row_format(match_row=True),
1667            )
1668        else:
1669            if self._match(TokenType.OR):
1670                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1671
1672            self._match(TokenType.INTO)
1673            self._match(TokenType.TABLE)
1674            this = self._parse_table(schema=True)
1675
1676        return self.expression(
1677            exp.Insert,
1678            this=this,
1679            exists=self._parse_exists(),
1680            partition=self._parse_partition(),
1681            expression=self._parse_ddl_select(),
1682            conflict=self._parse_on_conflict(),
1683            returning=self._parse_returning(),
1684            overwrite=overwrite,
1685            alternative=alternative,
1686        )
1687
1688    def _parse_on_conflict(self) -> t.Optional[exp.Expression]:
1689        conflict = self._match_text_seq("ON", "CONFLICT")
1690        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
1691
1692        if not (conflict or duplicate):
1693            return None
1694
1695        nothing = None
1696        expressions = None
1697        key = None
1698        constraint = None
1699
1700        if conflict:
1701            if self._match_text_seq("ON", "CONSTRAINT"):
1702                constraint = self._parse_id_var()
1703            else:
1704                key = self._parse_csv(self._parse_value)
1705
1706        self._match_text_seq("DO")
1707        if self._match_text_seq("NOTHING"):
1708            nothing = True
1709        else:
1710            self._match(TokenType.UPDATE)
1711            expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
1712
1713        return self.expression(
1714            exp.OnConflict,
1715            duplicate=duplicate,
1716            expressions=expressions,
1717            nothing=nothing,
1718            key=key,
1719            constraint=constraint,
1720        )
1721
1722    def _parse_returning(self) -> t.Optional[exp.Expression]:
1723        if not self._match(TokenType.RETURNING):
1724            return None
1725
1726        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1727
1728    def _parse_row(self) -> t.Optional[exp.Expression]:
1729        if not self._match(TokenType.FORMAT):
1730            return None
1731        return self._parse_row_format()
1732
1733    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1734        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1735            return None
1736
1737        if self._match_text_seq("SERDE"):
1738            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1739
1740        self._match_text_seq("DELIMITED")
1741
1742        kwargs = {}
1743
1744        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1745            kwargs["fields"] = self._parse_string()
1746            if self._match_text_seq("ESCAPED", "BY"):
1747                kwargs["escaped"] = self._parse_string()
1748        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1749            kwargs["collection_items"] = self._parse_string()
1750        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1751            kwargs["map_keys"] = self._parse_string()
1752        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1753            kwargs["lines"] = self._parse_string()
1754        if self._match_text_seq("NULL", "DEFINED", "AS"):
1755            kwargs["null"] = self._parse_string()
1756
1757        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1758
1759    def _parse_load_data(self) -> exp.Expression:
1760        local = self._match(TokenType.LOCAL)
1761        self._match_text_seq("INPATH")
1762        inpath = self._parse_string()
1763        overwrite = self._match(TokenType.OVERWRITE)
1764        self._match_pair(TokenType.INTO, TokenType.TABLE)
1765
1766        return self.expression(
1767            exp.LoadData,
1768            this=self._parse_table(schema=True),
1769            local=local,
1770            overwrite=overwrite,
1771            inpath=inpath,
1772            partition=self._parse_partition(),
1773            input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1774            serde=self._match_text_seq("SERDE") and self._parse_string(),
1775        )
1776
1777    def _parse_delete(self) -> exp.Expression:
1778        self._match(TokenType.FROM)
1779
1780        return self.expression(
1781            exp.Delete,
1782            this=self._parse_table(),
1783            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1784            where=self._parse_where(),
1785            returning=self._parse_returning(),
1786        )
1787
1788    def _parse_update(self) -> exp.Expression:
1789        return self.expression(
1790            exp.Update,
1791            **{  # type: ignore
1792                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1793                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1794                "from": self._parse_from(modifiers=True),
1795                "where": self._parse_where(),
1796                "returning": self._parse_returning(),
1797            },
1798        )
1799
1800    def _parse_uncache(self) -> exp.Expression:
1801        if not self._match(TokenType.TABLE):
1802            self.raise_error("Expecting TABLE after UNCACHE")
1803
1804        return self.expression(
1805            exp.Uncache,
1806            exists=self._parse_exists(),
1807            this=self._parse_table(schema=True),
1808        )
1809
1810    def _parse_cache(self) -> exp.Expression:
1811        lazy = self._match(TokenType.LAZY)
1812        self._match(TokenType.TABLE)
1813        table = self._parse_table(schema=True)
1814        options = []
1815
1816        if self._match(TokenType.OPTIONS):
1817            self._match_l_paren()
1818            k = self._parse_string()
1819            self._match(TokenType.EQ)
1820            v = self._parse_string()
1821            options = [k, v]
1822            self._match_r_paren()
1823
1824        self._match(TokenType.ALIAS)
1825        return self.expression(
1826            exp.Cache,
1827            this=table,
1828            lazy=lazy,
1829            options=options,
1830            expression=self._parse_select(nested=True),
1831        )
1832
1833    def _parse_partition(self) -> t.Optional[exp.Expression]:
1834        if not self._match(TokenType.PARTITION):
1835            return None
1836
1837        return self.expression(
1838            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1839        )
1840
1841    def _parse_value(self) -> exp.Expression:
1842        if self._match(TokenType.L_PAREN):
1843            expressions = self._parse_csv(self._parse_conjunction)
1844            self._match_r_paren()
1845            return self.expression(exp.Tuple, expressions=expressions)
1846
1847        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1848        # Source: https://prestodb.io/docs/current/sql/values.html
1849        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1850
1851    def _parse_select(
1852        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1853    ) -> t.Optional[exp.Expression]:
1854        cte = self._parse_with()
1855        if cte:
1856            this = self._parse_statement()
1857
1858            if not this:
1859                self.raise_error("Failed to parse any statement following CTE")
1860                return cte
1861
1862            if "with" in this.arg_types:
1863                this.set("with", cte)
1864            else:
1865                self.raise_error(f"{this.key} does not support CTE")
1866                this = cte
1867        elif self._match(TokenType.SELECT):
1868            comments = self._prev_comments
1869
1870            hint = self._parse_hint()
1871            all_ = self._match(TokenType.ALL)
1872            distinct = self._match(TokenType.DISTINCT)
1873
1874            kind = (
1875                self._match(TokenType.ALIAS)
1876                and self._match_texts(("STRUCT", "VALUE"))
1877                and self._prev.text
1878            )
1879
1880            if distinct:
1881                distinct = self.expression(
1882                    exp.Distinct,
1883                    on=self._parse_value() if self._match(TokenType.ON) else None,
1884                )
1885
1886            if all_ and distinct:
1887                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1888
1889            limit = self._parse_limit(top=True)
1890            expressions = self._parse_csv(self._parse_expression)
1891
1892            this = self.expression(
1893                exp.Select,
1894                kind=kind,
1895                hint=hint,
1896                distinct=distinct,
1897                expressions=expressions,
1898                limit=limit,
1899            )
1900            this.comments = comments
1901
1902            into = self._parse_into()
1903            if into:
1904                this.set("into", into)
1905
1906            from_ = self._parse_from()
1907            if from_:
1908                this.set("from", from_)
1909
1910            this = self._parse_query_modifiers(this)
1911        elif (table or nested) and self._match(TokenType.L_PAREN):
1912            this = self._parse_table() if table else self._parse_select(nested=True)
1913            this = self._parse_set_operations(self._parse_query_modifiers(this))
1914            self._match_r_paren()
1915
1916            # early return so that subquery unions aren't parsed again
1917            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1918            # Union ALL should be a property of the top select node, not the subquery
1919            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1920        elif self._match(TokenType.VALUES):
1921            this = self.expression(
1922                exp.Values,
1923                expressions=self._parse_csv(self._parse_value),
1924                alias=self._parse_table_alias(),
1925            )
1926        else:
1927            this = None
1928
1929        return self._parse_set_operations(this)
1930
1931    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1932        if not skip_with_token and not self._match(TokenType.WITH):
1933            return None
1934
1935        comments = self._prev_comments
1936        recursive = self._match(TokenType.RECURSIVE)
1937
1938        expressions = []
1939        while True:
1940            expressions.append(self._parse_cte())
1941
1942            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1943                break
1944            else:
1945                self._match(TokenType.WITH)
1946
1947        return self.expression(
1948            exp.With, comments=comments, expressions=expressions, recursive=recursive
1949        )
1950
1951    def _parse_cte(self) -> exp.Expression:
1952        alias = self._parse_table_alias()
1953        if not alias or not alias.this:
1954            self.raise_error("Expected CTE to have alias")
1955
1956        self._match(TokenType.ALIAS)
1957
1958        return self.expression(
1959            exp.CTE,
1960            this=self._parse_wrapped(self._parse_statement),
1961            alias=alias,
1962        )
1963
1964    def _parse_table_alias(
1965        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1966    ) -> t.Optional[exp.Expression]:
1967        any_token = self._match(TokenType.ALIAS)
1968        alias = (
1969            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1970            or self._parse_string_as_identifier()
1971        )
1972
1973        index = self._index
1974        if self._match(TokenType.L_PAREN):
1975            columns = self._parse_csv(self._parse_function_parameter)
1976            self._match_r_paren() if columns else self._retreat(index)
1977        else:
1978            columns = None
1979
1980        if not alias and not columns:
1981            return None
1982
1983        return self.expression(exp.TableAlias, this=alias, columns=columns)
1984
1985    def _parse_subquery(
1986        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1987    ) -> exp.Expression:
1988        return self.expression(
1989            exp.Subquery,
1990            this=this,
1991            pivots=self._parse_pivots(),
1992            alias=self._parse_table_alias() if parse_alias else None,
1993        )
1994
1995    def _parse_query_modifiers(
1996        self, this: t.Optional[exp.Expression]
1997    ) -> t.Optional[exp.Expression]:
1998        if isinstance(this, self.MODIFIABLES):
1999            for key, parser in self.QUERY_MODIFIER_PARSERS.items():
2000                expression = parser(self)
2001
2002                if expression:
2003                    this.set(key, expression)
2004        return this
2005
2006    def _parse_hint(self) -> t.Optional[exp.Expression]:
2007        if self._match(TokenType.HINT):
2008            hints = self._parse_csv(self._parse_function)
2009            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
2010                self.raise_error("Expected */ after HINT")
2011            return self.expression(exp.Hint, expressions=hints)
2012
2013        return None
2014
2015    def _parse_into(self) -> t.Optional[exp.Expression]:
2016        if not self._match(TokenType.INTO):
2017            return None
2018
2019        temp = self._match(TokenType.TEMPORARY)
2020        unlogged = self._match(TokenType.UNLOGGED)
2021        self._match(TokenType.TABLE)
2022
2023        return self.expression(
2024            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
2025        )
2026
2027    def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]:
2028        if not self._match(TokenType.FROM):
2029            return None
2030
2031        comments = self._prev_comments
2032        this = self._parse_table()
2033
2034        return self.expression(
2035            exp.From,
2036            comments=comments,
2037            this=self._parse_query_modifiers(this) if modifiers else this,
2038        )
2039
2040    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
2041        if not self._match(TokenType.MATCH_RECOGNIZE):
2042            return None
2043
2044        self._match_l_paren()
2045
2046        partition = self._parse_partition_by()
2047        order = self._parse_order()
2048        measures = (
2049            self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None
2050        )
2051
2052        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
2053            rows = exp.Var(this="ONE ROW PER MATCH")
2054        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
2055            text = "ALL ROWS PER MATCH"
2056            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
2057                text += f" SHOW EMPTY MATCHES"
2058            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2059                text += f" OMIT EMPTY MATCHES"
2060            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2061                text += f" WITH UNMATCHED ROWS"
2062            rows = exp.Var(this=text)
2063        else:
2064            rows = None
2065
2066        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2067            text = "AFTER MATCH SKIP"
2068            if self._match_text_seq("PAST", "LAST", "ROW"):
2069                text += f" PAST LAST ROW"
2070            elif self._match_text_seq("TO", "NEXT", "ROW"):
2071                text += f" TO NEXT ROW"
2072            elif self._match_text_seq("TO", "FIRST"):
2073                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
2074            elif self._match_text_seq("TO", "LAST"):
2075                text += f" TO LAST {self._advance_any().text}"  # type: ignore
2076            after = exp.Var(this=text)
2077        else:
2078            after = None
2079
2080        if self._match_text_seq("PATTERN"):
2081            self._match_l_paren()
2082
2083            if not self._curr:
2084                self.raise_error("Expecting )", self._curr)
2085
2086            paren = 1
2087            start = self._curr
2088
2089            while self._curr and paren > 0:
2090                if self._curr.token_type == TokenType.L_PAREN:
2091                    paren += 1
2092                if self._curr.token_type == TokenType.R_PAREN:
2093                    paren -= 1
2094                end = self._prev
2095                self._advance()
2096            if paren > 0:
2097                self.raise_error("Expecting )", self._curr)
2098            pattern = exp.Var(this=self._find_sql(start, end))
2099        else:
2100            pattern = None
2101
2102        define = (
2103            self._parse_csv(
2104                lambda: self.expression(
2105                    exp.Alias,
2106                    alias=self._parse_id_var(any_token=True),
2107                    this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
2108                )
2109            )
2110            if self._match_text_seq("DEFINE")
2111            else None
2112        )
2113
2114        self._match_r_paren()
2115
2116        return self.expression(
2117            exp.MatchRecognize,
2118            partition_by=partition,
2119            order=order,
2120            measures=measures,
2121            rows=rows,
2122            after=after,
2123            pattern=pattern,
2124            define=define,
2125            alias=self._parse_table_alias(),
2126        )
2127
2128    def _parse_lateral(self) -> t.Optional[exp.Expression]:
2129        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
2130        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2131
2132        if outer_apply or cross_apply:
2133            this = self._parse_select(table=True)
2134            view = None
2135            outer = not cross_apply
2136        elif self._match(TokenType.LATERAL):
2137            this = self._parse_select(table=True)
2138            view = self._match(TokenType.VIEW)
2139            outer = self._match(TokenType.OUTER)
2140        else:
2141            return None
2142
2143        if not this:
2144            this = self._parse_function() or self._parse_id_var(any_token=False)
2145            while self._match(TokenType.DOT):
2146                this = exp.Dot(
2147                    this=this,
2148                    expression=self._parse_function() or self._parse_id_var(any_token=False),
2149                )
2150
2151        table_alias: t.Optional[exp.Expression]
2152
2153        if view:
2154            table = self._parse_id_var(any_token=False)
2155            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2156            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
2157        else:
2158            table_alias = self._parse_table_alias()
2159
2160        expression = self.expression(
2161            exp.Lateral,
2162            this=this,
2163            view=view,
2164            outer=outer,
2165            alias=table_alias,
2166        )
2167
2168        return expression
2169
2170    def _parse_join_side_and_kind(
2171        self,
2172    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2173        return (
2174            self._match(TokenType.NATURAL) and self._prev,
2175            self._match_set(self.JOIN_SIDES) and self._prev,
2176            self._match_set(self.JOIN_KINDS) and self._prev,
2177        )
2178
2179    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
2180        if self._match(TokenType.COMMA):
2181            return self.expression(exp.Join, this=self._parse_table())
2182
2183        index = self._index
2184        natural, side, kind = self._parse_join_side_and_kind()
2185        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2186        join = self._match(TokenType.JOIN)
2187
2188        if not skip_join_token and not join:
2189            self._retreat(index)
2190            kind = None
2191            natural = None
2192            side = None
2193
2194        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2195        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2196
2197        if not skip_join_token and not join and not outer_apply and not cross_apply:
2198            return None
2199
2200        if outer_apply:
2201            side = Token(TokenType.LEFT, "LEFT")
2202
2203        kwargs: t.Dict[
2204            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
2205        ] = {"this": self._parse_table()}
2206
2207        if natural:
2208            kwargs["natural"] = True
2209        if side:
2210            kwargs["side"] = side.text
2211        if kind:
2212            kwargs["kind"] = kind.text
2213        if hint:
2214            kwargs["hint"] = hint
2215
2216        if self._match(TokenType.ON):
2217            kwargs["on"] = self._parse_conjunction()
2218        elif self._match(TokenType.USING):
2219            kwargs["using"] = self._parse_wrapped_id_vars()
2220
2221        return self.expression(exp.Join, **kwargs)  # type: ignore
2222
2223    def _parse_index(self) -> exp.Expression:
2224        index = self._parse_id_var()
2225        self._match(TokenType.ON)
2226        self._match(TokenType.TABLE)  # hive
2227
2228        return self.expression(
2229            exp.Index,
2230            this=index,
2231            table=self.expression(exp.Table, this=self._parse_id_var()),
2232            columns=self._parse_expression(),
2233        )
2234
2235    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
2236        unique = self._match(TokenType.UNIQUE)
2237        primary = self._match_text_seq("PRIMARY")
2238        amp = self._match_text_seq("AMP")
2239        if not self._match(TokenType.INDEX):
2240            return None
2241        index = self._parse_id_var()
2242        columns = None
2243        if self._match(TokenType.L_PAREN, advance=False):
2244            columns = self._parse_wrapped_csv(self._parse_column)
2245        return self.expression(
2246            exp.Index,
2247            this=index,
2248            columns=columns,
2249            unique=unique,
2250            primary=primary,
2251            amp=amp,
2252        )
2253
2254    def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
2255        return (
2256            (not schema and self._parse_function())
2257            or self._parse_id_var(any_token=False)
2258            or self._parse_string_as_identifier()
2259            or self._parse_placeholder()
2260        )
2261
2262    def _parse_table_parts(self, schema: bool = False) -> exp.Expression:
2263        catalog = None
2264        db = None
2265        table = self._parse_table_part(schema=schema)
2266
2267        while self._match(TokenType.DOT):
2268            if catalog:
2269                # This allows nesting the table in arbitrarily many dot expressions if needed
2270                table = self.expression(
2271                    exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
2272                )
2273            else:
2274                catalog = db
2275                db = table
2276                table = self._parse_table_part(schema=schema)
2277
2278        if not table:
2279            self.raise_error(f"Expected table name but got {self._curr}")
2280
2281        return self.expression(
2282            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2283        )
2284
2285    def _parse_table(
2286        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2287    ) -> t.Optional[exp.Expression]:
2288        lateral = self._parse_lateral()
2289        if lateral:
2290            return lateral
2291
2292        unnest = self._parse_unnest()
2293        if unnest:
2294            return unnest
2295
2296        values = self._parse_derived_table_values()
2297        if values:
2298            return values
2299
2300        subquery = self._parse_select(table=True)
2301        if subquery:
2302            if not subquery.args.get("pivots"):
2303                subquery.set("pivots", self._parse_pivots())
2304            return subquery
2305
2306        this = self._parse_table_parts(schema=schema)
2307
2308        if schema:
2309            return self._parse_schema(this=this)
2310
2311        if self.alias_post_tablesample:
2312            table_sample = self._parse_table_sample()
2313
2314        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2315        if alias:
2316            this.set("alias", alias)
2317
2318        if not this.args.get("pivots"):
2319            this.set("pivots", self._parse_pivots())
2320
2321        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2322            this.set(
2323                "hints",
2324                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2325            )
2326            self._match_r_paren()
2327
2328        if not self.alias_post_tablesample:
2329            table_sample = self._parse_table_sample()
2330
2331        if table_sample:
2332            table_sample.set("this", this)
2333            this = table_sample
2334
2335        return this
2336
2337    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2338        if not self._match(TokenType.UNNEST):
2339            return None
2340
2341        expressions = self._parse_wrapped_csv(self._parse_type)
2342        ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
2343        alias = self._parse_table_alias()
2344
2345        if alias and self.unnest_column_only:
2346            if alias.args.get("columns"):
2347                self.raise_error("Unexpected extra column alias in unnest.")
2348            alias.set("columns", [alias.this])
2349            alias.set("this", None)
2350
2351        offset = None
2352        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2353            self._match(TokenType.ALIAS)
2354            offset = self._parse_id_var() or exp.Identifier(this="offset")
2355
2356        return self.expression(
2357            exp.Unnest,
2358            expressions=expressions,
2359            ordinality=ordinality,
2360            alias=alias,
2361            offset=offset,
2362        )
2363
2364    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2365        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2366        if not is_derived and not self._match(TokenType.VALUES):
2367            return None
2368
2369        expressions = self._parse_csv(self._parse_value)
2370
2371        if is_derived:
2372            self._match_r_paren()
2373
2374        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2375
2376    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2377        if not self._match(TokenType.TABLE_SAMPLE) and not (
2378            as_modifier and self._match_text_seq("USING", "SAMPLE")
2379        ):
2380            return None
2381
2382        bucket_numerator = None
2383        bucket_denominator = None
2384        bucket_field = None
2385        percent = None
2386        rows = None
2387        size = None
2388        seed = None
2389
2390        kind = (
2391            self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2392        )
2393        method = self._parse_var(tokens=(TokenType.ROW,))
2394
2395        self._match(TokenType.L_PAREN)
2396
2397        num = self._parse_number()
2398
2399        if self._match(TokenType.BUCKET):
2400            bucket_numerator = self._parse_number()
2401            self._match(TokenType.OUT_OF)
2402            bucket_denominator = bucket_denominator = self._parse_number()
2403            self._match(TokenType.ON)
2404            bucket_field = self._parse_field()
2405        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2406            percent = num
2407        elif self._match(TokenType.ROWS):
2408            rows = num
2409        else:
2410            size = num
2411
2412        self._match(TokenType.R_PAREN)
2413
2414        if self._match(TokenType.L_PAREN):
2415            method = self._parse_var()
2416            seed = self._match(TokenType.COMMA) and self._parse_number()
2417            self._match_r_paren()
2418        elif self._match_texts(("SEED", "REPEATABLE")):
2419            seed = self._parse_wrapped(self._parse_number)
2420
2421        return self.expression(
2422            exp.TableSample,
2423            method=method,
2424            bucket_numerator=bucket_numerator,
2425            bucket_denominator=bucket_denominator,
2426            bucket_field=bucket_field,
2427            percent=percent,
2428            rows=rows,
2429            size=size,
2430            seed=seed,
2431            kind=kind,
2432        )
2433
2434    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2435        return list(iter(self._parse_pivot, None))
2436
2437    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2438        index = self._index
2439
2440        if self._match(TokenType.PIVOT):
2441            unpivot = False
2442        elif self._match(TokenType.UNPIVOT):
2443            unpivot = True
2444        else:
2445            return None
2446
2447        expressions = []
2448        field = None
2449
2450        if not self._match(TokenType.L_PAREN):
2451            self._retreat(index)
2452            return None
2453
2454        if unpivot:
2455            expressions = self._parse_csv(self._parse_column)
2456        else:
2457            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2458
2459        if not expressions:
2460            self.raise_error("Failed to parse PIVOT's aggregation list")
2461
2462        if not self._match(TokenType.FOR):
2463            self.raise_error("Expecting FOR")
2464
2465        value = self._parse_column()
2466
2467        if not self._match(TokenType.IN):
2468            self.raise_error("Expecting IN")
2469
2470        field = self._parse_in(value)
2471
2472        self._match_r_paren()
2473
2474        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2475
2476        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2477            pivot.set("alias", self._parse_table_alias())
2478
2479        if not unpivot:
2480            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
2481
2482            columns: t.List[exp.Expression] = []
2483            for fld in pivot.args["field"].expressions:
2484                field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
2485                for name in names:
2486                    if self.PREFIXED_PIVOT_COLUMNS:
2487                        name = f"{name}_{field_name}" if name else field_name
2488                    else:
2489                        name = f"{field_name}_{name}" if name else field_name
2490
2491                    columns.append(exp.to_identifier(name))
2492
2493            pivot.set("columns", columns)
2494
2495        return pivot
2496
2497    def _pivot_column_names(self, pivot_columns: t.List[exp.Expression]) -> t.List[str]:
2498        return [agg.alias for agg in pivot_columns]
2499
2500    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2501        if not skip_where_token and not self._match(TokenType.WHERE):
2502            return None
2503
2504        return self.expression(
2505            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2506        )
2507
2508    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2509        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2510            return None
2511
2512        elements = defaultdict(list)
2513
2514        while True:
2515            expressions = self._parse_csv(self._parse_conjunction)
2516            if expressions:
2517                elements["expressions"].extend(expressions)
2518
2519            grouping_sets = self._parse_grouping_sets()
2520            if grouping_sets:
2521                elements["grouping_sets"].extend(grouping_sets)
2522
2523            rollup = None
2524            cube = None
2525            totals = None
2526
2527            with_ = self._match(TokenType.WITH)
2528            if self._match(TokenType.ROLLUP):
2529                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2530                elements["rollup"].extend(ensure_list(rollup))
2531
2532            if self._match(TokenType.CUBE):
2533                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2534                elements["cube"].extend(ensure_list(cube))
2535
2536            if self._match_text_seq("TOTALS"):
2537                totals = True
2538                elements["totals"] = True  # type: ignore
2539
2540            if not (grouping_sets or rollup or cube or totals):
2541                break
2542
2543        return self.expression(exp.Group, **elements)  # type: ignore
2544
2545    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2546        if not self._match(TokenType.GROUPING_SETS):
2547            return None
2548
2549        return self._parse_wrapped_csv(self._parse_grouping_set)
2550
2551    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2552        if self._match(TokenType.L_PAREN):
2553            grouping_set = self._parse_csv(self._parse_column)
2554            self._match_r_paren()
2555            return self.expression(exp.Tuple, expressions=grouping_set)
2556
2557        return self._parse_column()
2558
2559    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2560        if not skip_having_token and not self._match(TokenType.HAVING):
2561            return None
2562        return self.expression(exp.Having, this=self._parse_conjunction())
2563
2564    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2565        if not self._match(TokenType.QUALIFY):
2566            return None
2567        return self.expression(exp.Qualify, this=self._parse_conjunction())
2568
2569    def _parse_order(
2570        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2571    ) -> t.Optional[exp.Expression]:
2572        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2573            return this
2574
2575        return self.expression(
2576            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2577        )
2578
2579    def _parse_sort(
2580        self, token_type: TokenType, exp_class: t.Type[exp.Expression]
2581    ) -> t.Optional[exp.Expression]:
2582        if not self._match(token_type):
2583            return None
2584        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2585
2586    def _parse_ordered(self) -> exp.Expression:
2587        this = self._parse_conjunction()
2588        self._match(TokenType.ASC)
2589        is_desc = self._match(TokenType.DESC)
2590        is_nulls_first = self._match(TokenType.NULLS_FIRST)
2591        is_nulls_last = self._match(TokenType.NULLS_LAST)
2592        desc = is_desc or False
2593        asc = not desc
2594        nulls_first = is_nulls_first or False
2595        explicitly_null_ordered = is_nulls_first or is_nulls_last
2596        if (
2597            not explicitly_null_ordered
2598            and (
2599                (asc and self.null_ordering == "nulls_are_small")
2600                or (desc and self.null_ordering != "nulls_are_small")
2601            )
2602            and self.null_ordering != "nulls_are_last"
2603        ):
2604            nulls_first = True
2605
2606        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2607
2608    def _parse_limit(
2609        self, this: t.Optional[exp.Expression] = None, top: bool = False
2610    ) -> t.Optional[exp.Expression]:
2611        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2612            limit_paren = self._match(TokenType.L_PAREN)
2613            limit_exp = self.expression(
2614                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2615            )
2616
2617            if limit_paren:
2618                self._match_r_paren()
2619
2620            return limit_exp
2621
2622        if self._match(TokenType.FETCH):
2623            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2624            direction = self._prev.text if direction else "FIRST"
2625
2626            count = self._parse_number()
2627            percent = self._match(TokenType.PERCENT)
2628
2629            self._match_set((TokenType.ROW, TokenType.ROWS))
2630
2631            only = self._match(TokenType.ONLY)
2632            with_ties = self._match_text_seq("WITH", "TIES")
2633
2634            if only and with_ties:
2635                self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
2636
2637            return self.expression(
2638                exp.Fetch,
2639                direction=direction,
2640                count=count,
2641                percent=percent,
2642                with_ties=with_ties,
2643            )
2644
2645        return this
2646
2647    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2648        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2649            return this
2650
2651        count = self._parse_number()
2652        self._match_set((TokenType.ROW, TokenType.ROWS))
2653        return self.expression(exp.Offset, this=this, expression=count)
2654
2655    def _parse_lock(self) -> t.Optional[exp.Expression]:
2656        if self._match_text_seq("FOR", "UPDATE"):
2657            return self.expression(exp.Lock, update=True)
2658        if self._match_text_seq("FOR", "SHARE"):
2659            return self.expression(exp.Lock, update=False)
2660
2661        return None
2662
2663    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2664        if not self._match_set(self.SET_OPERATIONS):
2665            return this
2666
2667        token_type = self._prev.token_type
2668
2669        if token_type == TokenType.UNION:
2670            expression = exp.Union
2671        elif token_type == TokenType.EXCEPT:
2672            expression = exp.Except
2673        else:
2674            expression = exp.Intersect
2675
2676        return self.expression(
2677            expression,
2678            this=this,
2679            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2680            expression=self._parse_set_operations(self._parse_select(nested=True)),
2681        )
2682
2683    def _parse_expression(self, explicit_alias: bool = False) -> t.Optional[exp.Expression]:
2684        return self._parse_alias(self._parse_conjunction(), explicit=explicit_alias)
2685
2686    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2687        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2688
2689    def _parse_equality(self) -> t.Optional[exp.Expression]:
2690        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2691
2692    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2693        return self._parse_tokens(self._parse_range, self.COMPARISON)
2694
2695    def _parse_range(self) -> t.Optional[exp.Expression]:
2696        this = self._parse_bitwise()
2697        negate = self._match(TokenType.NOT)
2698
2699        if self._match_set(self.RANGE_PARSERS):
2700            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
2701            if not expression:
2702                return this
2703
2704            this = expression
2705        elif self._match(TokenType.ISNULL):
2706            this = self.expression(exp.Is, this=this, expression=exp.Null())
2707
2708        # Postgres supports ISNULL and NOTNULL for conditions.
2709        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2710        if self._match(TokenType.NOTNULL):
2711            this = self.expression(exp.Is, this=this, expression=exp.Null())
2712            this = self.expression(exp.Not, this=this)
2713
2714        if negate:
2715            this = self.expression(exp.Not, this=this)
2716
2717        if self._match(TokenType.IS):
2718            this = self._parse_is(this)
2719
2720        return this
2721
2722    def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2723        index = self._index - 1
2724        negate = self._match(TokenType.NOT)
2725        if self._match(TokenType.DISTINCT_FROM):
2726            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2727            return self.expression(klass, this=this, expression=self._parse_expression())
2728
2729        expression = self._parse_null() or self._parse_boolean()
2730        if not expression:
2731            self._retreat(index)
2732            return None
2733
2734        this = self.expression(exp.Is, this=this, expression=expression)
2735        return self.expression(exp.Not, this=this) if negate else this
2736
2737    def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2738        unnest = self._parse_unnest()
2739        if unnest:
2740            this = self.expression(exp.In, this=this, unnest=unnest)
2741        elif self._match(TokenType.L_PAREN):
2742            expressions = self._parse_csv(self._parse_select_or_expression)
2743
2744            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2745                this = self.expression(exp.In, this=this, query=expressions[0])
2746            else:
2747                this = self.expression(exp.In, this=this, expressions=expressions)
2748
2749            self._match_r_paren(this)
2750        else:
2751            this = self.expression(exp.In, this=this, field=self._parse_field())
2752
2753        return this
2754
2755    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2756        low = self._parse_bitwise()
2757        self._match(TokenType.AND)
2758        high = self._parse_bitwise()
2759        return self.expression(exp.Between, this=this, low=low, high=high)
2760
2761    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2762        if not self._match(TokenType.ESCAPE):
2763            return this
2764        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2765
2766    def _parse_interval(self) -> t.Optional[exp.Expression]:
2767        if not self._match(TokenType.INTERVAL):
2768            return None
2769
2770        this = self._parse_primary() or self._parse_term()
2771        unit = self._parse_function() or self._parse_var()
2772
2773        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
2774        # each INTERVAL expression into this canonical form so it's easy to transpile
2775        if this and isinstance(this, exp.Literal):
2776            if this.is_number:
2777                this = exp.Literal.string(this.name)
2778
2779            # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year'
2780            parts = this.name.split()
2781            if not unit and len(parts) <= 2:
2782                this = exp.Literal.string(seq_get(parts, 0))
2783                unit = self.expression(exp.Var, this=seq_get(parts, 1))
2784
2785        return self.expression(exp.Interval, this=this, unit=unit)
2786
2787    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2788        this = self._parse_term()
2789
2790        while True:
2791            if self._match_set(self.BITWISE):
2792                this = self.expression(
2793                    self.BITWISE[self._prev.token_type],
2794                    this=this,
2795                    expression=self._parse_term(),
2796                )
2797            elif self._match_pair(TokenType.LT, TokenType.LT):
2798                this = self.expression(
2799                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2800                )
2801            elif self._match_pair(TokenType.GT, TokenType.GT):
2802                this = self.expression(
2803                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2804                )
2805            else:
2806                break
2807
2808        return this
2809
2810    def _parse_term(self) -> t.Optional[exp.Expression]:
2811        return self._parse_tokens(self._parse_factor, self.TERM)
2812
2813    def _parse_factor(self) -> t.Optional[exp.Expression]:
2814        return self._parse_tokens(self._parse_unary, self.FACTOR)
2815
2816    def _parse_unary(self) -> t.Optional[exp.Expression]:
2817        if self._match_set(self.UNARY_PARSERS):
2818            return self.UNARY_PARSERS[self._prev.token_type](self)
2819        return self._parse_at_time_zone(self._parse_type())
2820
2821    def _parse_type(self) -> t.Optional[exp.Expression]:
2822        interval = self._parse_interval()
2823        if interval:
2824            return interval
2825
2826        index = self._index
2827        data_type = self._parse_types(check_func=True)
2828        this = self._parse_column()
2829
2830        if data_type:
2831            if isinstance(this, exp.Literal):
2832                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
2833                if parser:
2834                    return parser(self, this, data_type)
2835                return self.expression(exp.Cast, this=this, to=data_type)
2836            if not data_type.expressions:
2837                self._retreat(index)
2838                return self._parse_column()
2839            return data_type
2840
2841        return this
2842
2843    def _parse_type_size(self) -> t.Optional[exp.Expression]:
2844        this = self._parse_type()
2845        if not this:
2846            return None
2847
2848        return self.expression(
2849            exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True)
2850        )
2851
2852    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2853        index = self._index
2854
2855        prefix = self._match_text_seq("SYSUDTLIB", ".")
2856
2857        if not self._match_set(self.TYPE_TOKENS):
2858            return None
2859
2860        type_token = self._prev.token_type
2861
2862        if type_token == TokenType.PSEUDO_TYPE:
2863            return self.expression(exp.PseudoType, this=self._prev.text)
2864
2865        nested = type_token in self.NESTED_TYPE_TOKENS
2866        is_struct = type_token == TokenType.STRUCT
2867        expressions = None
2868        maybe_func = False
2869
2870        if self._match(TokenType.L_PAREN):
2871            if is_struct:
2872                expressions = self._parse_csv(self._parse_struct_types)
2873            elif nested:
2874                expressions = self._parse_csv(self._parse_types)
2875            else:
2876                expressions = self._parse_csv(self._parse_type_size)
2877
2878            if not expressions or not self._match(TokenType.R_PAREN):
2879                self._retreat(index)
2880                return None
2881
2882            maybe_func = True
2883
2884        if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2885            this = exp.DataType(
2886                this=exp.DataType.Type.ARRAY,
2887                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2888                nested=True,
2889            )
2890
2891            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2892                this = exp.DataType(
2893                    this=exp.DataType.Type.ARRAY,
2894                    expressions=[this],
2895                    nested=True,
2896                )
2897
2898            return this
2899
2900        if self._match(TokenType.L_BRACKET):
2901            self._retreat(index)
2902            return None
2903
2904        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2905        if nested and self._match(TokenType.LT):
2906            if is_struct:
2907                expressions = self._parse_csv(self._parse_struct_types)
2908            else:
2909                expressions = self._parse_csv(self._parse_types)
2910
2911            if not self._match(TokenType.GT):
2912                self.raise_error("Expecting >")
2913
2914            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2915                values = self._parse_csv(self._parse_conjunction)
2916                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2917
2918        value: t.Optional[exp.Expression] = None
2919        if type_token in self.TIMESTAMPS:
2920            if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ:
2921                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2922            elif (
2923                self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ
2924            ):
2925                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2926            elif self._match(TokenType.WITHOUT_TIME_ZONE):
2927                if type_token == TokenType.TIME:
2928                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2929                else:
2930                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2931
2932            maybe_func = maybe_func and value is None
2933
2934            if value is None:
2935                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2936        elif type_token == TokenType.INTERVAL:
2937            unit = self._parse_var()
2938
2939            if not unit:
2940                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2941            else:
2942                value = self.expression(exp.Interval, unit=unit)
2943
2944        if maybe_func and check_func:
2945            index2 = self._index
2946            peek = self._parse_string()
2947
2948            if not peek:
2949                self._retreat(index)
2950                return None
2951
2952            self._retreat(index2)
2953
2954        if value:
2955            return value
2956
2957        return exp.DataType(
2958            this=exp.DataType.Type[type_token.value.upper()],
2959            expressions=expressions,
2960            nested=nested,
2961            values=values,
2962            prefix=prefix,
2963        )
2964
2965    def _parse_struct_types(self) -> t.Optional[exp.Expression]:
2966        this = self._parse_type() or self._parse_id_var()
2967        self._match(TokenType.COLON)
2968        return self._parse_column_def(this)
2969
2970    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2971        if not self._match(TokenType.AT_TIME_ZONE):
2972            return this
2973        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2974
2975    def _parse_column(self) -> t.Optional[exp.Expression]:
2976        this = self._parse_field()
2977        if isinstance(this, exp.Identifier):
2978            this = self.expression(exp.Column, this=this)
2979        elif not this:
2980            return self._parse_bracket(this)
2981        this = self._parse_bracket(this)
2982
2983        while self._match_set(self.COLUMN_OPERATORS):
2984            op_token = self._prev.token_type
2985            op = self.COLUMN_OPERATORS.get(op_token)
2986
2987            if op_token == TokenType.DCOLON:
2988                field = self._parse_types()
2989                if not field:
2990                    self.raise_error("Expected type")
2991            elif op and self._curr:
2992                self._advance()
2993                value = self._prev.text
2994                field = (
2995                    exp.Literal.number(value)
2996                    if self._prev.token_type == TokenType.NUMBER
2997                    else exp.Literal.string(value)
2998                )
2999            else:
3000                field = (
3001                    self._parse_star()
3002                    or self._parse_function(anonymous=True)
3003                    or self._parse_id_var()
3004                )
3005
3006            if isinstance(field, exp.Func):
3007                # bigquery allows function calls like x.y.count(...)
3008                # SAFE.SUBSTR(...)
3009                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
3010                this = self._replace_columns_with_dots(this)
3011
3012            if op:
3013                this = op(self, this, field)
3014            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
3015                this = self.expression(
3016                    exp.Column,
3017                    this=field,
3018                    table=this.this,
3019                    db=this.args.get("table"),
3020                    catalog=this.args.get("db"),
3021                )
3022            else:
3023                this = self.expression(exp.Dot, this=this, expression=field)
3024            this = self._parse_bracket(this)
3025
3026        return this
3027
3028    def _parse_primary(self) -> t.Optional[exp.Expression]:
3029        if self._match_set(self.PRIMARY_PARSERS):
3030            token_type = self._prev.token_type
3031            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
3032
3033            if token_type == TokenType.STRING:
3034                expressions = [primary]
3035                while self._match(TokenType.STRING):
3036                    expressions.append(exp.Literal.string(self._prev.text))
3037                if len(expressions) > 1:
3038                    return self.expression(exp.Concat, expressions=expressions)
3039            return primary
3040
3041        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
3042            return exp.Literal.number(f"0.{self._prev.text}")
3043
3044        if self._match(TokenType.L_PAREN):
3045            comments = self._prev_comments
3046            query = self._parse_select()
3047
3048            if query:
3049                expressions = [query]
3050            else:
3051                expressions = self._parse_csv(lambda: self._parse_expression(explicit_alias=True))
3052
3053            this = self._parse_query_modifiers(seq_get(expressions, 0))
3054
3055            if isinstance(this, exp.Subqueryable):
3056                this = self._parse_set_operations(
3057                    self._parse_subquery(this=this, parse_alias=False)
3058                )
3059            elif len(expressions) > 1:
3060                this = self.expression(exp.Tuple, expressions=expressions)
3061            else:
3062                this = self.expression(exp.Paren, this=self._parse_set_operations(this))
3063
3064            if this:
3065                this.add_comments(comments)
3066            self._match_r_paren(expression=this)
3067
3068            return this
3069
3070        return None
3071
3072    def _parse_field(
3073        self,
3074        any_token: bool = False,
3075        tokens: t.Optional[t.Collection[TokenType]] = None,
3076    ) -> t.Optional[exp.Expression]:
3077        return (
3078            self._parse_primary()
3079            or self._parse_function()
3080            or self._parse_id_var(any_token=any_token, tokens=tokens)
3081        )
3082
3083    def _parse_function(
3084        self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False
3085    ) -> t.Optional[exp.Expression]:
3086        if not self._curr:
3087            return None
3088
3089        token_type = self._curr.token_type
3090
3091        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
3092            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
3093
3094        if not self._next or self._next.token_type != TokenType.L_PAREN:
3095            if token_type in self.NO_PAREN_FUNCTIONS:
3096                self._advance()
3097                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
3098
3099            return None
3100
3101        if token_type not in self.FUNC_TOKENS:
3102            return None
3103
3104        this = self._curr.text
3105        upper = this.upper()
3106        self._advance(2)
3107
3108        parser = self.FUNCTION_PARSERS.get(upper)
3109
3110        if parser and not anonymous:
3111            this = parser(self)
3112        else:
3113            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
3114
3115            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
3116                this = self.expression(subquery_predicate, this=self._parse_select())
3117                self._match_r_paren()
3118                return this
3119
3120            if functions is None:
3121                functions = self.FUNCTIONS
3122
3123            function = functions.get(upper)
3124            args = self._parse_csv(self._parse_lambda)
3125
3126            if function and not anonymous:
3127                this = function(args)
3128                self.validate_expression(this, args)
3129            else:
3130                this = self.expression(exp.Anonymous, this=this, expressions=args)
3131
3132        self._match_r_paren(this)
3133        return self._parse_window(this)
3134
3135    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
3136        return self._parse_column_def(self._parse_id_var())
3137
3138    def _parse_user_defined_function(
3139        self, kind: t.Optional[TokenType] = None
3140    ) -> t.Optional[exp.Expression]:
3141        this = self._parse_id_var()
3142
3143        while self._match(TokenType.DOT):
3144            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
3145
3146        if not self._match(TokenType.L_PAREN):
3147            return this
3148
3149        expressions = self._parse_csv(self._parse_function_parameter)
3150        self._match_r_paren()
3151        return self.expression(
3152            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
3153        )
3154
3155    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
3156        literal = self._parse_primary()
3157        if literal:
3158            return self.expression(exp.Introducer, this=token.text, expression=literal)
3159
3160        return self.expression(exp.Identifier, this=token.text)
3161
3162    def _parse_national(self, token: Token) -> exp.Expression:
3163        return self.expression(exp.National, this=exp.Literal.string(token.text))
3164
3165    def _parse_session_parameter(self) -> exp.Expression:
3166        kind = None
3167        this = self._parse_id_var() or self._parse_primary()
3168
3169        if this and self._match(TokenType.DOT):
3170            kind = this.name
3171            this = self._parse_var() or self._parse_primary()
3172
3173        return self.expression(exp.SessionParameter, this=this, kind=kind)
3174
3175    def _parse_lambda(self) -> t.Optional[exp.Expression]:
3176        index = self._index
3177
3178        if self._match(TokenType.L_PAREN):
3179            expressions = self._parse_csv(self._parse_id_var)
3180
3181            if not self._match(TokenType.R_PAREN):
3182                self._retreat(index)
3183        else:
3184            expressions = [self._parse_id_var()]
3185
3186        if self._match_set(self.LAMBDAS):
3187            return self.LAMBDAS[self._prev.token_type](self, expressions)
3188
3189        self._retreat(index)
3190
3191        this: t.Optional[exp.Expression]
3192
3193        if self._match(TokenType.DISTINCT):
3194            this = self.expression(
3195                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
3196            )
3197        else:
3198            this = self._parse_select_or_expression()
3199
3200            if isinstance(this, exp.EQ):
3201                left = this.this
3202                if isinstance(left, exp.Column):
3203                    left.replace(exp.Var(this=left.text("this")))
3204
3205        return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this)))
3206
3207    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3208        index = self._index
3209
3210        try:
3211            if self._parse_select(nested=True):
3212                return this
3213        except Exception:
3214            pass
3215        finally:
3216            self._retreat(index)
3217
3218        if not self._match(TokenType.L_PAREN):
3219            return this
3220
3221        args = self._parse_csv(
3222            lambda: self._parse_constraint()
3223            or self._parse_column_def(self._parse_field(any_token=True))
3224        )
3225        self._match_r_paren()
3226        return self.expression(exp.Schema, this=this, expressions=args)
3227
3228    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3229        # column defs are not really columns, they're identifiers
3230        if isinstance(this, exp.Column):
3231            this = this.this
3232        kind = self._parse_types()
3233
3234        if self._match_text_seq("FOR", "ORDINALITY"):
3235            return self.expression(exp.ColumnDef, this=this, ordinality=True)
3236
3237        constraints = []
3238        while True:
3239            constraint = self._parse_column_constraint()
3240            if not constraint:
3241                break
3242            constraints.append(constraint)
3243
3244        if not kind and not constraints:
3245            return this
3246
3247        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
3248
3249    def _parse_auto_increment(self) -> exp.Expression:
3250        start = None
3251        increment = None
3252
3253        if self._match(TokenType.L_PAREN, advance=False):
3254            args = self._parse_wrapped_csv(self._parse_bitwise)
3255            start = seq_get(args, 0)
3256            increment = seq_get(args, 1)
3257        elif self._match_text_seq("START"):
3258            start = self._parse_bitwise()
3259            self._match_text_seq("INCREMENT")
3260            increment = self._parse_bitwise()
3261
3262        if start and increment:
3263            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
3264
3265        return exp.AutoIncrementColumnConstraint()
3266
3267    def _parse_compress(self) -> exp.Expression:
3268        if self._match(TokenType.L_PAREN, advance=False):
3269            return self.expression(
3270                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
3271            )
3272
3273        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
3274
3275    def _parse_generated_as_identity(self) -> exp.Expression:
3276        if self._match(TokenType.BY_DEFAULT):
3277            on_null = self._match_pair(TokenType.ON, TokenType.NULL)
3278            this = self.expression(
3279                exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null
3280            )
3281        else:
3282            self._match_text_seq("ALWAYS")
3283            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3284
3285        self._match_text_seq("AS", "IDENTITY")
3286        if self._match(TokenType.L_PAREN):
3287            if self._match_text_seq("START", "WITH"):
3288                this.set("start", self._parse_bitwise())
3289            if self._match_text_seq("INCREMENT", "BY"):
3290                this.set("increment", self._parse_bitwise())
3291            if self._match_text_seq("MINVALUE"):
3292                this.set("minvalue", self._parse_bitwise())
3293            if self._match_text_seq("MAXVALUE"):
3294                this.set("maxvalue", self._parse_bitwise())
3295
3296            if self._match_text_seq("CYCLE"):
3297                this.set("cycle", True)
3298            elif self._match_text_seq("NO", "CYCLE"):
3299                this.set("cycle", False)
3300
3301            self._match_r_paren()
3302
3303        return this
3304
3305    def _parse_inline(self) -> t.Optional[exp.Expression]:
3306        self._match_text_seq("LENGTH")
3307        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3308
3309    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3310        if self._match_text_seq("NULL"):
3311            return self.expression(exp.NotNullColumnConstraint)
3312        if self._match_text_seq("CASESPECIFIC"):
3313            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3314        return None
3315
3316    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3317        if self._match(TokenType.CONSTRAINT):
3318            this = self._parse_id_var()
3319        else:
3320            this = None
3321
3322        if self._match_texts(self.CONSTRAINT_PARSERS):
3323            return self.expression(
3324                exp.ColumnConstraint,
3325                this=this,
3326                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3327            )
3328
3329        return this
3330
3331    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3332        if not self._match(TokenType.CONSTRAINT):
3333            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3334
3335        this = self._parse_id_var()
3336        expressions = []
3337
3338        while True:
3339            constraint = self._parse_unnamed_constraint() or self._parse_function()
3340            if not constraint:
3341                break
3342            expressions.append(constraint)
3343
3344        return self.expression(exp.Constraint, this=this, expressions=expressions)
3345
3346    def _parse_unnamed_constraint(
3347        self, constraints: t.Optional[t.Collection[str]] = None
3348    ) -> t.Optional[exp.Expression]:
3349        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3350            return None
3351
3352        constraint = self._prev.text.upper()
3353        if constraint not in self.CONSTRAINT_PARSERS:
3354            self.raise_error(f"No parser found for schema constraint {constraint}.")
3355
3356        return self.CONSTRAINT_PARSERS[constraint](self)
3357
3358    def _parse_unique(self) -> exp.Expression:
3359        if not self._match(TokenType.L_PAREN, advance=False):
3360            return self.expression(exp.UniqueColumnConstraint)
3361        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
3362
3363    def _parse_key_constraint_options(self) -> t.List[str]:
3364        options = []
3365        while True:
3366            if not self._curr:
3367                break
3368
3369            if self._match(TokenType.ON):
3370                action = None
3371                on = self._advance_any() and self._prev.text
3372
3373                if self._match(TokenType.NO_ACTION):
3374                    action = "NO ACTION"
3375                elif self._match(TokenType.CASCADE):
3376                    action = "CASCADE"
3377                elif self._match_pair(TokenType.SET, TokenType.NULL):
3378                    action = "SET NULL"
3379                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3380                    action = "SET DEFAULT"
3381                else:
3382                    self.raise_error("Invalid key constraint")
3383
3384                options.append(f"ON {on} {action}")
3385            elif self._match_text_seq("NOT", "ENFORCED"):
3386                options.append("NOT ENFORCED")
3387            elif self._match_text_seq("DEFERRABLE"):
3388                options.append("DEFERRABLE")
3389            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3390                options.append("INITIALLY DEFERRED")
3391            elif self._match_text_seq("NORELY"):
3392                options.append("NORELY")
3393            elif self._match_text_seq("MATCH", "FULL"):
3394                options.append("MATCH FULL")
3395            else:
3396                break
3397
3398        return options
3399
3400    def _parse_references(self, match=True) -> t.Optional[exp.Expression]:
3401        if match and not self._match(TokenType.REFERENCES):
3402            return None
3403
3404        expressions = None
3405        this = self._parse_id_var()
3406
3407        if self._match(TokenType.L_PAREN, advance=False):
3408            expressions = self._parse_wrapped_id_vars()
3409
3410        options = self._parse_key_constraint_options()
3411        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3412
3413    def _parse_foreign_key(self) -> exp.Expression:
3414        expressions = self._parse_wrapped_id_vars()
3415        reference = self._parse_references()
3416        options = {}
3417
3418        while self._match(TokenType.ON):
3419            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3420                self.raise_error("Expected DELETE or UPDATE")
3421
3422            kind = self._prev.text.lower()
3423
3424            if self._match(TokenType.NO_ACTION):
3425                action = "NO ACTION"
3426            elif self._match(TokenType.SET):
3427                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3428                action = "SET " + self._prev.text.upper()
3429            else:
3430                self._advance()
3431                action = self._prev.text.upper()
3432
3433            options[kind] = action
3434
3435        return self.expression(
3436            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3437        )
3438
3439    def _parse_primary_key(self) -> exp.Expression:
3440        desc = (
3441            self._match_set((TokenType.ASC, TokenType.DESC))
3442            and self._prev.token_type == TokenType.DESC
3443        )
3444
3445        if not self._match(TokenType.L_PAREN, advance=False):
3446            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3447
3448        expressions = self._parse_wrapped_csv(self._parse_field)
3449        options = self._parse_key_constraint_options()
3450        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3451
3452    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3453        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3454            return this
3455
3456        bracket_kind = self._prev.token_type
3457        expressions: t.List[t.Optional[exp.Expression]]
3458
3459        if self._match(TokenType.COLON):
3460            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3461        else:
3462            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3463
3464        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3465        if bracket_kind == TokenType.L_BRACE:
3466            this = self.expression(exp.Struct, expressions=expressions)
3467        elif not this or this.name.upper() == "ARRAY":
3468            this = self.expression(exp.Array, expressions=expressions)
3469        else:
3470            expressions = apply_index_offset(this, expressions, -self.index_offset)
3471            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3472
3473        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3474            self.raise_error("Expected ]")
3475        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3476            self.raise_error("Expected }")
3477
3478        self._add_comments(this)
3479        return self._parse_bracket(this)
3480
3481    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3482        if self._match(TokenType.COLON):
3483            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3484        return this
3485
3486    def _parse_case(self) -> t.Optional[exp.Expression]:
3487        ifs = []
3488        default = None
3489
3490        expression = self._parse_conjunction()
3491
3492        while self._match(TokenType.WHEN):
3493            this = self._parse_conjunction()
3494            self._match(TokenType.THEN)
3495            then = self._parse_conjunction()
3496            ifs.append(self.expression(exp.If, this=this, true=then))
3497
3498        if self._match(TokenType.ELSE):
3499            default = self._parse_conjunction()
3500
3501        if not self._match(TokenType.END):
3502            self.raise_error("Expected END after CASE", self._prev)
3503
3504        return self._parse_window(
3505            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3506        )
3507
3508    def _parse_if(self) -> t.Optional[exp.Expression]:
3509        if self._match(TokenType.L_PAREN):
3510            args = self._parse_csv(self._parse_conjunction)
3511            this = exp.If.from_arg_list(args)
3512            self.validate_expression(this, args)
3513            self._match_r_paren()
3514        else:
3515            index = self._index - 1
3516            condition = self._parse_conjunction()
3517
3518            if not condition:
3519                self._retreat(index)
3520                return None
3521
3522            self._match(TokenType.THEN)
3523            true = self._parse_conjunction()
3524            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3525            self._match(TokenType.END)
3526            this = self.expression(exp.If, this=condition, true=true, false=false)
3527
3528        return self._parse_window(this)
3529
3530    def _parse_extract(self) -> exp.Expression:
3531        this = self._parse_function() or self._parse_var() or self._parse_type()
3532
3533        if self._match(TokenType.FROM):
3534            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3535
3536        if not self._match(TokenType.COMMA):
3537            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3538
3539        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3540
3541    def _parse_cast(self, strict: bool) -> exp.Expression:
3542        this = self._parse_conjunction()
3543
3544        if not self._match(TokenType.ALIAS):
3545            if self._match(TokenType.COMMA):
3546                return self.expression(
3547                    exp.CastToStrType, this=this, expression=self._parse_string()
3548                )
3549            else:
3550                self.raise_error("Expected AS after CAST")
3551
3552        to = self._parse_types()
3553
3554        if not to:
3555            self.raise_error("Expected TYPE after CAST")
3556        elif to.this == exp.DataType.Type.CHAR:
3557            if self._match(TokenType.CHARACTER_SET):
3558                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3559
3560        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3561
3562    def _parse_string_agg(self) -> exp.Expression:
3563        expression: t.Optional[exp.Expression]
3564
3565        if self._match(TokenType.DISTINCT):
3566            args = self._parse_csv(self._parse_conjunction)
3567            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3568        else:
3569            args = self._parse_csv(self._parse_conjunction)
3570            expression = seq_get(args, 0)
3571
3572        index = self._index
3573        if not self._match(TokenType.R_PAREN):
3574            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3575            order = self._parse_order(this=expression)
3576            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3577
3578        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3579        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3580        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3581        if not self._match(TokenType.WITHIN_GROUP):
3582            self._retreat(index)
3583            this = exp.GroupConcat.from_arg_list(args)
3584            self.validate_expression(this, args)
3585            return this
3586
3587        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3588        order = self._parse_order(this=expression)
3589        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3590
3591    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3592        to: t.Optional[exp.Expression]
3593        this = self._parse_bitwise()
3594
3595        if self._match(TokenType.USING):
3596            to = self.expression(exp.CharacterSet, this=self._parse_var())
3597        elif self._match(TokenType.COMMA):
3598            to = self._parse_bitwise()
3599        else:
3600            to = None
3601
3602        # Swap the argument order if needed to produce the correct AST
3603        if self.CONVERT_TYPE_FIRST:
3604            this, to = to, this
3605
3606        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3607
3608    def _parse_decode(self) -> t.Optional[exp.Expression]:
3609        """
3610        There are generally two variants of the DECODE function:
3611
3612        - DECODE(bin, charset)
3613        - DECODE(expression, search, result [, search, result] ... [, default])
3614
3615        The second variant will always be parsed into a CASE expression. Note that NULL
3616        needs special treatment, since we need to explicitly check for it with `IS NULL`,
3617        instead of relying on pattern matching.
3618        """
3619        args = self._parse_csv(self._parse_conjunction)
3620
3621        if len(args) < 3:
3622            return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1))
3623
3624        expression, *expressions = args
3625        if not expression:
3626            return None
3627
3628        ifs = []
3629        for search, result in zip(expressions[::2], expressions[1::2]):
3630            if not search or not result:
3631                return None
3632
3633            if isinstance(search, exp.Literal):
3634                ifs.append(
3635                    exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result)
3636                )
3637            elif isinstance(search, exp.Null):
3638                ifs.append(
3639                    exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result)
3640                )
3641            else:
3642                cond = exp.or_(
3643                    exp.EQ(this=expression.copy(), expression=search),
3644                    exp.and_(
3645                        exp.Is(this=expression.copy(), expression=exp.Null()),
3646                        exp.Is(this=search.copy(), expression=exp.Null()),
3647                        copy=False,
3648                    ),
3649                    copy=False,
3650                )
3651                ifs.append(exp.If(this=cond, true=result))
3652
3653        return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None)
3654
3655    def _parse_json_key_value(self) -> t.Optional[exp.Expression]:
3656        self._match_text_seq("KEY")
3657        key = self._parse_field()
3658        self._match(TokenType.COLON)
3659        self._match_text_seq("VALUE")
3660        value = self._parse_field()
3661        if not key and not value:
3662            return None
3663        return self.expression(exp.JSONKeyValue, this=key, expression=value)
3664
3665    def _parse_json_object(self) -> exp.Expression:
3666        expressions = self._parse_csv(self._parse_json_key_value)
3667
3668        null_handling = None
3669        if self._match_text_seq("NULL", "ON", "NULL"):
3670            null_handling = "NULL ON NULL"
3671        elif self._match_text_seq("ABSENT", "ON", "NULL"):
3672            null_handling = "ABSENT ON NULL"
3673
3674        unique_keys = None
3675        if self._match_text_seq("WITH", "UNIQUE"):
3676            unique_keys = True
3677        elif self._match_text_seq("WITHOUT", "UNIQUE"):
3678            unique_keys = False
3679
3680        self._match_text_seq("KEYS")
3681
3682        return_type = self._match_text_seq("RETURNING") and self._parse_type()
3683        format_json = self._match_text_seq("FORMAT", "JSON")
3684        encoding = self._match_text_seq("ENCODING") and self._parse_var()
3685
3686        return self.expression(
3687            exp.JSONObject,
3688            expressions=expressions,
3689            null_handling=null_handling,
3690            unique_keys=unique_keys,
3691            return_type=return_type,
3692            format_json=format_json,
3693            encoding=encoding,
3694        )
3695
3696    def _parse_logarithm(self) -> exp.Expression:
3697        # Default argument order is base, expression
3698        args = self._parse_csv(self._parse_range)
3699
3700        if len(args) > 1:
3701            if not self.LOG_BASE_FIRST:
3702                args.reverse()
3703            return exp.Log.from_arg_list(args)
3704
3705        return self.expression(
3706            exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0)
3707        )
3708
3709    def _parse_match_against(self) -> exp.Expression:
3710        expressions = self._parse_csv(self._parse_column)
3711
3712        self._match_text_seq(")", "AGAINST", "(")
3713
3714        this = self._parse_string()
3715
3716        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
3717            modifier = "IN NATURAL LANGUAGE MODE"
3718            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3719                modifier = f"{modifier} WITH QUERY EXPANSION"
3720        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
3721            modifier = "IN BOOLEAN MODE"
3722        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3723            modifier = "WITH QUERY EXPANSION"
3724        else:
3725            modifier = None
3726
3727        return self.expression(
3728            exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier
3729        )
3730
3731    # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16
3732    def _parse_open_json(self) -> exp.Expression:
3733        this = self._parse_bitwise()
3734        path = self._match(TokenType.COMMA) and self._parse_string()
3735
3736        def _parse_open_json_column_def() -> exp.Expression:
3737            this = self._parse_field(any_token=True)
3738            kind = self._parse_types()
3739            path = self._parse_string()
3740            as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON)
3741            return self.expression(
3742                exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json
3743            )
3744
3745        expressions = None
3746        if self._match_pair(TokenType.R_PAREN, TokenType.WITH):
3747            self._match_l_paren()
3748            expressions = self._parse_csv(_parse_open_json_column_def)
3749
3750        return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions)
3751
3752    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3753        args = self._parse_csv(self._parse_bitwise)
3754
3755        if self._match(TokenType.IN):
3756            return self.expression(
3757                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3758            )
3759
3760        if haystack_first:
3761            haystack = seq_get(args, 0)
3762            needle = seq_get(args, 1)
3763        else:
3764            needle = seq_get(args, 0)
3765            haystack = seq_get(args, 1)
3766
3767        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3768
3769        self.validate_expression(this, args)
3770
3771        return this
3772
3773    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3774        args = self._parse_csv(self._parse_table)
3775        return exp.JoinHint(this=func_name.upper(), expressions=args)
3776
3777    def _parse_substring(self) -> exp.Expression:
3778        # Postgres supports the form: substring(string [from int] [for int])
3779        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3780
3781        args = self._parse_csv(self._parse_bitwise)
3782
3783        if self._match(TokenType.FROM):
3784            args.append(self._parse_bitwise())
3785            if self._match(TokenType.FOR):
3786                args.append(self._parse_bitwise())
3787
3788        this = exp.Substring.from_arg_list(args)
3789        self.validate_expression(this, args)
3790
3791        return this
3792
3793    def _parse_trim(self) -> exp.Expression:
3794        # https://www.w3resource.com/sql/character-functions/trim.php
3795        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3796
3797        position = None
3798        collation = None
3799
3800        if self._match_set(self.TRIM_TYPES):
3801            position = self._prev.text.upper()
3802
3803        expression = self._parse_bitwise()
3804        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3805            this = self._parse_bitwise()
3806        else:
3807            this = expression
3808            expression = None
3809
3810        if self._match(TokenType.COLLATE):
3811            collation = self._parse_bitwise()
3812
3813        return self.expression(
3814            exp.Trim,
3815            this=this,
3816            position=position,
3817            expression=expression,
3818            collation=collation,
3819        )
3820
3821    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3822        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3823
3824    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3825        return self._parse_window(self._parse_id_var(), alias=True)
3826
3827    def _parse_respect_or_ignore_nulls(
3828        self, this: t.Optional[exp.Expression]
3829    ) -> t.Optional[exp.Expression]:
3830        if self._match(TokenType.IGNORE_NULLS):
3831            return self.expression(exp.IgnoreNulls, this=this)
3832        if self._match(TokenType.RESPECT_NULLS):
3833            return self.expression(exp.RespectNulls, this=this)
3834        return this
3835
3836    def _parse_window(
3837        self, this: t.Optional[exp.Expression], alias: bool = False
3838    ) -> t.Optional[exp.Expression]:
3839        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3840            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3841            self._match_r_paren()
3842
3843        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3844        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3845        if self._match(TokenType.WITHIN_GROUP):
3846            order = self._parse_wrapped(self._parse_order)
3847            this = self.expression(exp.WithinGroup, this=this, expression=order)
3848
3849        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3850        # Some dialects choose to implement and some do not.
3851        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3852
3853        # There is some code above in _parse_lambda that handles
3854        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3855
3856        # The below changes handle
3857        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3858
3859        # Oracle allows both formats
3860        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3861        #   and Snowflake chose to do the same for familiarity
3862        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3863        this = self._parse_respect_or_ignore_nulls(this)
3864
3865        # bigquery select from window x AS (partition by ...)
3866        if alias:
3867            over = None
3868            self._match(TokenType.ALIAS)
3869        elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS):
3870            return this
3871        else:
3872            over = self._prev.text.upper()
3873
3874        if not self._match(TokenType.L_PAREN):
3875            return self.expression(
3876                exp.Window, this=this, alias=self._parse_id_var(False), over=over
3877            )
3878
3879        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3880
3881        first = self._match(TokenType.FIRST)
3882        if self._match_text_seq("LAST"):
3883            first = False
3884
3885        partition = self._parse_partition_by()
3886        order = self._parse_order()
3887        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3888
3889        if kind:
3890            self._match(TokenType.BETWEEN)
3891            start = self._parse_window_spec()
3892            self._match(TokenType.AND)
3893            end = self._parse_window_spec()
3894
3895            spec = self.expression(
3896                exp.WindowSpec,
3897                kind=kind,
3898                start=start["value"],
3899                start_side=start["side"],
3900                end=end["value"],
3901                end_side=end["side"],
3902            )
3903        else:
3904            spec = None
3905
3906        self._match_r_paren()
3907
3908        return self.expression(
3909            exp.Window,
3910            this=this,
3911            partition_by=partition,
3912            order=order,
3913            spec=spec,
3914            alias=window_alias,
3915            over=over,
3916            first=first,
3917        )
3918
3919    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3920        self._match(TokenType.BETWEEN)
3921
3922        return {
3923            "value": (
3924                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text
3925            )
3926            or self._parse_bitwise(),
3927            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text,
3928        }
3929
3930    def _parse_alias(
3931        self, this: t.Optional[exp.Expression], explicit: bool = False
3932    ) -> t.Optional[exp.Expression]:
3933        any_token = self._match(TokenType.ALIAS)
3934
3935        if explicit and not any_token:
3936            return this
3937
3938        if self._match(TokenType.L_PAREN):
3939            aliases = self.expression(
3940                exp.Aliases,
3941                this=this,
3942                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3943            )
3944            self._match_r_paren(aliases)
3945            return aliases
3946
3947        alias = self._parse_id_var(any_token)
3948
3949        if alias:
3950            return self.expression(exp.Alias, this=this, alias=alias)
3951
3952        return this
3953
3954    def _parse_id_var(
3955        self,
3956        any_token: bool = True,
3957        tokens: t.Optional[t.Collection[TokenType]] = None,
3958        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3959    ) -> t.Optional[exp.Expression]:
3960        identifier = self._parse_identifier()
3961
3962        if identifier:
3963            return identifier
3964
3965        prefix = ""
3966
3967        if prefix_tokens:
3968            while self._match_set(prefix_tokens):
3969                prefix += self._prev.text
3970
3971        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3972            quoted = self._prev.token_type == TokenType.STRING
3973            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3974
3975        return None
3976
3977    def _parse_string(self) -> t.Optional[exp.Expression]:
3978        if self._match(TokenType.STRING):
3979            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
3980        return self._parse_placeholder()
3981
3982    def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]:
3983        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
3984
3985    def _parse_number(self) -> t.Optional[exp.Expression]:
3986        if self._match(TokenType.NUMBER):
3987            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
3988        return self._parse_placeholder()
3989
3990    def _parse_identifier(self) -> t.Optional[exp.Expression]:
3991        if self._match(TokenType.IDENTIFIER):
3992            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
3993        return self._parse_placeholder()
3994
3995    def _parse_var(
3996        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
3997    ) -> t.Optional[exp.Expression]:
3998        if (
3999            (any_token and self._advance_any())
4000            or self._match(TokenType.VAR)
4001            or (self._match_set(tokens) if tokens else False)
4002        ):
4003            return self.expression(exp.Var, this=self._prev.text)
4004        return self._parse_placeholder()
4005
4006    def _advance_any(self) -> t.Optional[Token]:
4007        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
4008            self._advance()
4009            return self._prev
4010        return None
4011
4012    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
4013        return self._parse_var() or self._parse_string()
4014
4015    def _parse_null(self) -> t.Optional[exp.Expression]:
4016        if self._match(TokenType.NULL):
4017            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
4018        return None
4019
4020    def _parse_boolean(self) -> t.Optional[exp.Expression]:
4021        if self._match(TokenType.TRUE):
4022            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
4023        if self._match(TokenType.FALSE):
4024            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
4025        return None
4026
4027    def _parse_star(self) -> t.Optional[exp.Expression]:
4028        if self._match(TokenType.STAR):
4029            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
4030        return None
4031
4032    def _parse_parameter(self) -> exp.Expression:
4033        wrapped = self._match(TokenType.L_BRACE)
4034        this = self._parse_var() or self._parse_identifier() or self._parse_primary()
4035        self._match(TokenType.R_BRACE)
4036        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
4037
4038    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
4039        if self._match_set(self.PLACEHOLDER_PARSERS):
4040            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
4041            if placeholder:
4042                return placeholder
4043            self._advance(-1)
4044        return None
4045
4046    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4047        if not self._match(TokenType.EXCEPT):
4048            return None
4049        if self._match(TokenType.L_PAREN, advance=False):
4050            return self._parse_wrapped_csv(self._parse_column)
4051        return self._parse_csv(self._parse_column)
4052
4053    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4054        if not self._match(TokenType.REPLACE):
4055            return None
4056        if self._match(TokenType.L_PAREN, advance=False):
4057            return self._parse_wrapped_csv(self._parse_expression)
4058        return self._parse_csv(self._parse_expression)
4059
4060    def _parse_csv(
4061        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
4062    ) -> t.List[t.Optional[exp.Expression]]:
4063        parse_result = parse_method()
4064        items = [parse_result] if parse_result is not None else []
4065
4066        while self._match(sep):
4067            self._add_comments(parse_result)
4068            parse_result = parse_method()
4069            if parse_result is not None:
4070                items.append(parse_result)
4071
4072        return items
4073
4074    def _parse_tokens(
4075        self, parse_method: t.Callable, expressions: t.Dict
4076    ) -> t.Optional[exp.Expression]:
4077        this = parse_method()
4078
4079        while self._match_set(expressions):
4080            this = self.expression(
4081                expressions[self._prev.token_type],
4082                this=this,
4083                comments=self._prev_comments,
4084                expression=parse_method(),
4085            )
4086
4087        return this
4088
4089    def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]:
4090        return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
4091
4092    def _parse_wrapped_csv(
4093        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False
4094    ) -> t.List[t.Optional[exp.Expression]]:
4095        return self._parse_wrapped(
4096            lambda: self._parse_csv(parse_method, sep=sep), optional=optional
4097        )
4098
4099    def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any:
4100        wrapped = self._match(TokenType.L_PAREN)
4101        if not wrapped and not optional:
4102            self.raise_error("Expecting (")
4103        parse_result = parse_method()
4104        if wrapped:
4105            self._match_r_paren()
4106        return parse_result
4107
4108    def _parse_select_or_expression(self) -> t.Optional[exp.Expression]:
4109        return self._parse_select() or self._parse_set_operations(self._parse_expression())
4110
4111    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
4112        return self._parse_set_operations(
4113            self._parse_select(nested=True, parse_subquery_alias=False)
4114        )
4115
4116    def _parse_transaction(self) -> exp.Expression:
4117        this = None
4118        if self._match_texts(self.TRANSACTION_KIND):
4119            this = self._prev.text
4120
4121        self._match_texts({"TRANSACTION", "WORK"})
4122
4123        modes = []
4124        while True:
4125            mode = []
4126            while self._match(TokenType.VAR):
4127                mode.append(self._prev.text)
4128
4129            if mode:
4130                modes.append(" ".join(mode))
4131            if not self._match(TokenType.COMMA):
4132                break
4133
4134        return self.expression(exp.Transaction, this=this, modes=modes)
4135
4136    def _parse_commit_or_rollback(self) -> exp.Expression:
4137        chain = None
4138        savepoint = None
4139        is_rollback = self._prev.token_type == TokenType.ROLLBACK
4140
4141        self._match_texts({"TRANSACTION", "WORK"})
4142
4143        if self._match_text_seq("TO"):
4144            self._match_text_seq("SAVEPOINT")
4145            savepoint = self._parse_id_var()
4146
4147        if self._match(TokenType.AND):
4148            chain = not self._match_text_seq("NO")
4149            self._match_text_seq("CHAIN")
4150
4151        if is_rollback:
4152            return self.expression(exp.Rollback, savepoint=savepoint)
4153        return self.expression(exp.Commit, chain=chain)
4154
4155    def _parse_add_column(self) -> t.Optional[exp.Expression]:
4156        if not self._match_text_seq("ADD"):
4157            return None
4158
4159        self._match(TokenType.COLUMN)
4160        exists_column = self._parse_exists(not_=True)
4161        expression = self._parse_column_def(self._parse_field(any_token=True))
4162
4163        if expression:
4164            expression.set("exists", exists_column)
4165
4166            # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
4167            if self._match_texts(("FIRST", "AFTER")):
4168                position = self._prev.text
4169                column_position = self.expression(
4170                    exp.ColumnPosition, this=self._parse_column(), position=position
4171                )
4172                expression.set("position", column_position)
4173
4174        return expression
4175
4176    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
4177        drop = self._match(TokenType.DROP) and self._parse_drop()
4178        if drop and not isinstance(drop, exp.Command):
4179            drop.set("kind", drop.args.get("kind", "COLUMN"))
4180        return drop
4181
4182    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
4183    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
4184        return self.expression(
4185            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
4186        )
4187
4188    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
4189        this = None
4190        kind = self._prev.token_type
4191
4192        if kind == TokenType.CONSTRAINT:
4193            this = self._parse_id_var()
4194
4195            if self._match_text_seq("CHECK"):
4196                expression = self._parse_wrapped(self._parse_conjunction)
4197                enforced = self._match_text_seq("ENFORCED")
4198
4199                return self.expression(
4200                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
4201                )
4202
4203        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
4204            expression = self._parse_foreign_key()
4205        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
4206            expression = self._parse_primary_key()
4207        else:
4208            expression = None
4209
4210        return self.expression(exp.AddConstraint, this=this, expression=expression)
4211
4212    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
4213        index = self._index - 1
4214
4215        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
4216            return self._parse_csv(self._parse_add_constraint)
4217
4218        self._retreat(index)
4219        return self._parse_csv(self._parse_add_column)
4220
4221    def _parse_alter_table_alter(self) -> exp.Expression:
4222        self._match(TokenType.COLUMN)
4223        column = self._parse_field(any_token=True)
4224
4225        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
4226            return self.expression(exp.AlterColumn, this=column, drop=True)
4227        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
4228            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
4229
4230        self._match_text_seq("SET", "DATA")
4231        return self.expression(
4232            exp.AlterColumn,
4233            this=column,
4234            dtype=self._match_text_seq("TYPE") and self._parse_types(),
4235            collate=self._match(TokenType.COLLATE) and self._parse_term(),
4236            using=self._match(TokenType.USING) and self._parse_conjunction(),
4237        )
4238
4239    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
4240        index = self._index - 1
4241
4242        partition_exists = self._parse_exists()
4243        if self._match(TokenType.PARTITION, advance=False):
4244            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
4245
4246        self._retreat(index)
4247        return self._parse_csv(self._parse_drop_column)
4248
4249    def _parse_alter_table_rename(self) -> exp.Expression:
4250        self._match_text_seq("TO")
4251        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
4252
4253    def _parse_alter(self) -> t.Optional[exp.Expression]:
4254        start = self._prev
4255
4256        if not self._match(TokenType.TABLE):
4257            return self._parse_as_command(start)
4258
4259        exists = self._parse_exists()
4260        this = self._parse_table(schema=True)
4261
4262        if self._next:
4263            self._advance()
4264        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
4265
4266        if parser:
4267            actions = ensure_list(parser(self))
4268
4269            if not self._curr:
4270                return self.expression(
4271                    exp.AlterTable,
4272                    this=this,
4273                    exists=exists,
4274                    actions=actions,
4275                )
4276        return self._parse_as_command(start)
4277
4278    def _parse_merge(self) -> exp.Expression:
4279        self._match(TokenType.INTO)
4280        target = self._parse_table()
4281
4282        self._match(TokenType.USING)
4283        using = self._parse_table()
4284
4285        self._match(TokenType.ON)
4286        on = self._parse_conjunction()
4287
4288        whens = []
4289        while self._match(TokenType.WHEN):
4290            matched = not self._match(TokenType.NOT)
4291            self._match_text_seq("MATCHED")
4292            source = (
4293                False
4294                if self._match_text_seq("BY", "TARGET")
4295                else self._match_text_seq("BY", "SOURCE")
4296            )
4297            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
4298
4299            self._match(TokenType.THEN)
4300
4301            if self._match(TokenType.INSERT):
4302                _this = self._parse_star()
4303                if _this:
4304                    then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this)
4305                else:
4306                    then = self.expression(
4307                        exp.Insert,
4308                        this=self._parse_value(),
4309                        expression=self._match(TokenType.VALUES) and self._parse_value(),
4310                    )
4311            elif self._match(TokenType.UPDATE):
4312                expressions = self._parse_star()
4313                if expressions:
4314                    then = self.expression(exp.Update, expressions=expressions)
4315                else:
4316                    then = self.expression(
4317                        exp.Update,
4318                        expressions=self._match(TokenType.SET)
4319                        and self._parse_csv(self._parse_equality),
4320                    )
4321            elif self._match(TokenType.DELETE):
4322                then = self.expression(exp.Var, this=self._prev.text)
4323            else:
4324                then = None
4325
4326            whens.append(
4327                self.expression(
4328                    exp.When,
4329                    matched=matched,
4330                    source=source,
4331                    condition=condition,
4332                    then=then,
4333                )
4334            )
4335
4336        return self.expression(
4337            exp.Merge,
4338            this=target,
4339            using=using,
4340            on=on,
4341            expressions=whens,
4342        )
4343
4344    def _parse_show(self) -> t.Optional[exp.Expression]:
4345        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
4346        if parser:
4347            return parser(self)
4348        self._advance()
4349        return self.expression(exp.Show, this=self._prev.text.upper())
4350
4351    def _parse_set_item_assignment(
4352        self, kind: t.Optional[str] = None
4353    ) -> t.Optional[exp.Expression]:
4354        index = self._index
4355
4356        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
4357            return self._parse_set_transaction(global_=kind == "GLOBAL")
4358
4359        left = self._parse_primary() or self._parse_id_var()
4360
4361        if not self._match_texts(("=", "TO")):
4362            self._retreat(index)
4363            return None
4364
4365        right = self._parse_statement() or self._parse_id_var()
4366        this = self.expression(
4367            exp.EQ,
4368            this=left,
4369            expression=right,
4370        )
4371
4372        return self.expression(
4373            exp.SetItem,
4374            this=this,
4375            kind=kind,
4376        )
4377
4378    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
4379        self._match_text_seq("TRANSACTION")
4380        characteristics = self._parse_csv(
4381            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
4382        )
4383        return self.expression(
4384            exp.SetItem,
4385            expressions=characteristics,
4386            kind="TRANSACTION",
4387            **{"global": global_},  # type: ignore
4388        )
4389
4390    def _parse_set_item(self) -> t.Optional[exp.Expression]:
4391        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
4392        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
4393
4394    def _parse_set(self) -> exp.Expression:
4395        index = self._index
4396        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
4397
4398        if self._curr:
4399            self._retreat(index)
4400            return self._parse_as_command(self._prev)
4401
4402        return set_
4403
4404    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]:
4405        for option in options:
4406            if self._match_text_seq(*option.split(" ")):
4407                return exp.Var(this=option)
4408        return None
4409
4410    def _parse_as_command(self, start: Token) -> exp.Command:
4411        while self._curr:
4412            self._advance()
4413        text = self._find_sql(start, self._prev)
4414        size = len(start.text)
4415        return exp.Command(this=text[:size], expression=text[size:])
4416
4417    def _find_parser(
4418        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
4419    ) -> t.Optional[t.Callable]:
4420        if not self._curr:
4421            return None
4422
4423        index = self._index
4424        this = []
4425        while True:
4426            # The current token might be multiple words
4427            curr = self._curr.text.upper()
4428            key = curr.split(" ")
4429            this.append(curr)
4430            self._advance()
4431            result, trie = in_trie(trie, key)
4432            if result == 0:
4433                break
4434            if result == 2:
4435                subparser = parsers[" ".join(this)]
4436                return subparser
4437        self._retreat(index)
4438        return None
4439
4440    def _match(self, token_type, advance=True, expression=None):
4441        if not self._curr:
4442            return None
4443
4444        if self._curr.token_type == token_type:
4445            if advance:
4446                self._advance()
4447            self._add_comments(expression)
4448            return True
4449
4450        return None
4451
4452    def _match_set(self, types, advance=True):
4453        if not self._curr:
4454            return None
4455
4456        if self._curr.token_type in types:
4457            if advance:
4458                self._advance()
4459            return True
4460
4461        return None
4462
4463    def _match_pair(self, token_type_a, token_type_b, advance=True):
4464        if not self._curr or not self._next:
4465            return None
4466
4467        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
4468            if advance:
4469                self._advance(2)
4470            return True
4471
4472        return None
4473
4474    def _match_l_paren(self, expression=None):
4475        if not self._match(TokenType.L_PAREN, expression=expression):
4476            self.raise_error("Expecting (")
4477
4478    def _match_r_paren(self, expression=None):
4479        if not self._match(TokenType.R_PAREN, expression=expression):
4480            self.raise_error("Expecting )")
4481
4482    def _match_texts(self, texts, advance=True):
4483        if self._curr and self._curr.text.upper() in texts:
4484            if advance:
4485                self._advance()
4486            return True
4487        return False
4488
4489    def _match_text_seq(self, *texts, advance=True):
4490        index = self._index
4491        for text in texts:
4492            if self._curr and self._curr.text.upper() == text:
4493                self._advance()
4494            else:
4495                self._retreat(index)
4496                return False
4497
4498        if not advance:
4499            self._retreat(index)
4500
4501        return True
4502
4503    def _replace_columns_with_dots(self, this):
4504        if isinstance(this, exp.Dot):
4505            exp.replace_children(this, self._replace_columns_with_dots)
4506        elif isinstance(this, exp.Column):
4507            exp.replace_children(this, self._replace_columns_with_dots)
4508            table = this.args.get("table")
4509            this = (
4510                self.expression(exp.Dot, this=table, expression=this.this)
4511                if table
4512                else self.expression(exp.Var, this=this.name)
4513            )
4514        elif isinstance(this, exp.Identifier):
4515            this = self.expression(exp.Var, this=this.name)
4516        return this
4517
4518    def _replace_lambda(self, node, lambda_variables):
4519        for column in node.find_all(exp.Column):
4520            if column.parts[0].name in lambda_variables:
4521                dot_or_id = column.to_dot() if column.table else column.this
4522                parent = column.parent
4523
4524                while isinstance(parent, exp.Dot):
4525                    if not isinstance(parent.parent, exp.Dot):
4526                        parent.replace(dot_or_id)
4527                        break
4528                    parent = parent.parent
4529                else:
4530                    if column is node:
4531                        node = dot_or_id
4532                    else:
4533                        column.replace(dot_or_id)
4534        return node
def parse_var_map(args: Sequence) -> sqlglot.expressions.Expression:
19def parse_var_map(args: t.Sequence) -> exp.Expression:
20    if len(args) == 1 and args[0].is_star:
21        return exp.StarMap(this=args[0])
22
23    keys = []
24    values = []
25    for i in range(0, len(args), 2):
26        keys.append(args[i])
27        values.append(args[i + 1])
28    return exp.VarMap(
29        keys=exp.Array(expressions=keys),
30        values=exp.Array(expressions=values),
31    )
def parse_like(args):
34def parse_like(args):
35    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
36    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
def binary_range_parser( expr_type: Type[sqlglot.expressions.Expression]) -> Callable[[sqlglot.parser.Parser, Optional[sqlglot.expressions.Expression]], Optional[sqlglot.expressions.Expression]]:
39def binary_range_parser(
40    expr_type: t.Type[exp.Expression],
41) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]:
42    return lambda self, this: self._parse_escape(
43        self.expression(expr_type, this=this, expression=self._parse_bitwise())
44    )
class Parser:
  56class Parser(metaclass=_Parser):
  57    """
  58    Parser consumes a list of tokens produced by the `sqlglot.tokens.Tokenizer` and produces
  59    a parsed syntax tree.
  60
  61    Args:
  62        error_level: the desired error level.
  63            Default: ErrorLevel.RAISE
  64        error_message_context: determines the amount of context to capture from a
  65            query string when displaying the error message (in number of characters).
  66            Default: 50.
  67        index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list.
  68            Default: 0
  69        alias_post_tablesample: If the table alias comes after tablesample.
  70            Default: False
  71        max_errors: Maximum number of error messages to include in a raised ParseError.
  72            This is only relevant if error_level is ErrorLevel.RAISE.
  73            Default: 3
  74        null_ordering: Indicates the default null ordering method to use if not explicitly set.
  75            Options are "nulls_are_small", "nulls_are_large", "nulls_are_last".
  76            Default: "nulls_are_small"
  77    """
  78
  79    FUNCTIONS: t.Dict[str, t.Callable] = {
  80        **{name: f.from_arg_list for f in exp.ALL_FUNCTIONS for name in f.sql_names()},
  81        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  82            this=seq_get(args, 0),
  83            to=exp.DataType(this=exp.DataType.Type.TEXT),
  84        ),
  85        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
  86        "IFNULL": exp.Coalesce.from_arg_list,
  87        "LIKE": parse_like,
  88        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  89            this=seq_get(args, 0),
  90            to=exp.DataType(this=exp.DataType.Type.TEXT),
  91        ),
  92        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  93            this=exp.Cast(
  94                this=seq_get(args, 0),
  95                to=exp.DataType(this=exp.DataType.Type.TEXT),
  96            ),
  97            start=exp.Literal.number(1),
  98            length=exp.Literal.number(10),
  99        ),
 100        "VAR_MAP": parse_var_map,
 101    }
 102
 103    NO_PAREN_FUNCTIONS = {
 104        TokenType.CURRENT_DATE: exp.CurrentDate,
 105        TokenType.CURRENT_DATETIME: exp.CurrentDate,
 106        TokenType.CURRENT_TIME: exp.CurrentTime,
 107        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
 108        TokenType.CURRENT_USER: exp.CurrentUser,
 109    }
 110
 111    JOIN_HINTS: t.Set[str] = set()
 112
 113    NESTED_TYPE_TOKENS = {
 114        TokenType.ARRAY,
 115        TokenType.MAP,
 116        TokenType.NULLABLE,
 117        TokenType.STRUCT,
 118    }
 119
 120    TYPE_TOKENS = {
 121        TokenType.BIT,
 122        TokenType.BOOLEAN,
 123        TokenType.TINYINT,
 124        TokenType.UTINYINT,
 125        TokenType.SMALLINT,
 126        TokenType.USMALLINT,
 127        TokenType.INT,
 128        TokenType.UINT,
 129        TokenType.BIGINT,
 130        TokenType.UBIGINT,
 131        TokenType.INT128,
 132        TokenType.UINT128,
 133        TokenType.INT256,
 134        TokenType.UINT256,
 135        TokenType.FLOAT,
 136        TokenType.DOUBLE,
 137        TokenType.CHAR,
 138        TokenType.NCHAR,
 139        TokenType.VARCHAR,
 140        TokenType.NVARCHAR,
 141        TokenType.TEXT,
 142        TokenType.MEDIUMTEXT,
 143        TokenType.LONGTEXT,
 144        TokenType.MEDIUMBLOB,
 145        TokenType.LONGBLOB,
 146        TokenType.BINARY,
 147        TokenType.VARBINARY,
 148        TokenType.JSON,
 149        TokenType.JSONB,
 150        TokenType.INTERVAL,
 151        TokenType.TIME,
 152        TokenType.TIMESTAMP,
 153        TokenType.TIMESTAMPTZ,
 154        TokenType.TIMESTAMPLTZ,
 155        TokenType.DATETIME,
 156        TokenType.DATETIME64,
 157        TokenType.DATE,
 158        TokenType.DECIMAL,
 159        TokenType.BIGDECIMAL,
 160        TokenType.UUID,
 161        TokenType.GEOGRAPHY,
 162        TokenType.GEOMETRY,
 163        TokenType.HLLSKETCH,
 164        TokenType.HSTORE,
 165        TokenType.PSEUDO_TYPE,
 166        TokenType.SUPER,
 167        TokenType.SERIAL,
 168        TokenType.SMALLSERIAL,
 169        TokenType.BIGSERIAL,
 170        TokenType.XML,
 171        TokenType.UNIQUEIDENTIFIER,
 172        TokenType.MONEY,
 173        TokenType.SMALLMONEY,
 174        TokenType.ROWVERSION,
 175        TokenType.IMAGE,
 176        TokenType.VARIANT,
 177        TokenType.OBJECT,
 178        TokenType.INET,
 179        *NESTED_TYPE_TOKENS,
 180    }
 181
 182    SUBQUERY_PREDICATES = {
 183        TokenType.ANY: exp.Any,
 184        TokenType.ALL: exp.All,
 185        TokenType.EXISTS: exp.Exists,
 186        TokenType.SOME: exp.Any,
 187    }
 188
 189    RESERVED_KEYWORDS = {*Tokenizer.SINGLE_TOKENS.values(), TokenType.SELECT}
 190
 191    DB_CREATABLES = {
 192        TokenType.DATABASE,
 193        TokenType.SCHEMA,
 194        TokenType.TABLE,
 195        TokenType.VIEW,
 196    }
 197
 198    CREATABLES = {
 199        TokenType.COLUMN,
 200        TokenType.FUNCTION,
 201        TokenType.INDEX,
 202        TokenType.PROCEDURE,
 203        *DB_CREATABLES,
 204    }
 205
 206    ID_VAR_TOKENS = {
 207        TokenType.VAR,
 208        TokenType.ANTI,
 209        TokenType.APPLY,
 210        TokenType.AUTO_INCREMENT,
 211        TokenType.BEGIN,
 212        TokenType.BOTH,
 213        TokenType.BUCKET,
 214        TokenType.CACHE,
 215        TokenType.CASCADE,
 216        TokenType.COLLATE,
 217        TokenType.COMMAND,
 218        TokenType.COMMENT,
 219        TokenType.COMMIT,
 220        TokenType.COMPOUND,
 221        TokenType.CONSTRAINT,
 222        TokenType.DEFAULT,
 223        TokenType.DELETE,
 224        TokenType.DESCRIBE,
 225        TokenType.DIV,
 226        TokenType.END,
 227        TokenType.EXECUTE,
 228        TokenType.ESCAPE,
 229        TokenType.FALSE,
 230        TokenType.FIRST,
 231        TokenType.FILTER,
 232        TokenType.FOLLOWING,
 233        TokenType.FORMAT,
 234        TokenType.FULL,
 235        TokenType.IF,
 236        TokenType.IS,
 237        TokenType.ISNULL,
 238        TokenType.INTERVAL,
 239        TokenType.KEEP,
 240        TokenType.LAZY,
 241        TokenType.LEADING,
 242        TokenType.LEFT,
 243        TokenType.LOCAL,
 244        TokenType.MATERIALIZED,
 245        TokenType.MERGE,
 246        TokenType.NATURAL,
 247        TokenType.NEXT,
 248        TokenType.OFFSET,
 249        TokenType.ONLY,
 250        TokenType.OPTIONS,
 251        TokenType.ORDINALITY,
 252        TokenType.OVERWRITE,
 253        TokenType.PARTITION,
 254        TokenType.PERCENT,
 255        TokenType.PIVOT,
 256        TokenType.PRAGMA,
 257        TokenType.PRECEDING,
 258        TokenType.RANGE,
 259        TokenType.REFERENCES,
 260        TokenType.RIGHT,
 261        TokenType.ROW,
 262        TokenType.ROWS,
 263        TokenType.SEED,
 264        TokenType.SEMI,
 265        TokenType.SET,
 266        TokenType.SETTINGS,
 267        TokenType.SHOW,
 268        TokenType.SORTKEY,
 269        TokenType.TEMPORARY,
 270        TokenType.TOP,
 271        TokenType.TRAILING,
 272        TokenType.TRUE,
 273        TokenType.UNBOUNDED,
 274        TokenType.UNIQUE,
 275        TokenType.UNLOGGED,
 276        TokenType.UNPIVOT,
 277        TokenType.VOLATILE,
 278        TokenType.WINDOW,
 279        *CREATABLES,
 280        *SUBQUERY_PREDICATES,
 281        *TYPE_TOKENS,
 282        *NO_PAREN_FUNCTIONS,
 283    }
 284
 285    INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END}
 286
 287    TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - {
 288        TokenType.APPLY,
 289        TokenType.FULL,
 290        TokenType.LEFT,
 291        TokenType.NATURAL,
 292        TokenType.OFFSET,
 293        TokenType.RIGHT,
 294        TokenType.WINDOW,
 295    }
 296
 297    COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS}
 298
 299    UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET}
 300
 301    TRIM_TYPES = {TokenType.LEADING, TokenType.TRAILING, TokenType.BOTH}
 302
 303    FUNC_TOKENS = {
 304        TokenType.COMMAND,
 305        TokenType.CURRENT_DATE,
 306        TokenType.CURRENT_DATETIME,
 307        TokenType.CURRENT_TIMESTAMP,
 308        TokenType.CURRENT_TIME,
 309        TokenType.CURRENT_USER,
 310        TokenType.FILTER,
 311        TokenType.FIRST,
 312        TokenType.FORMAT,
 313        TokenType.GLOB,
 314        TokenType.IDENTIFIER,
 315        TokenType.INDEX,
 316        TokenType.ISNULL,
 317        TokenType.ILIKE,
 318        TokenType.LIKE,
 319        TokenType.MERGE,
 320        TokenType.OFFSET,
 321        TokenType.PRIMARY_KEY,
 322        TokenType.RANGE,
 323        TokenType.REPLACE,
 324        TokenType.ROW,
 325        TokenType.UNNEST,
 326        TokenType.VAR,
 327        TokenType.LEFT,
 328        TokenType.RIGHT,
 329        TokenType.DATE,
 330        TokenType.DATETIME,
 331        TokenType.TABLE,
 332        TokenType.TIMESTAMP,
 333        TokenType.TIMESTAMPTZ,
 334        TokenType.WINDOW,
 335        *TYPE_TOKENS,
 336        *SUBQUERY_PREDICATES,
 337    }
 338
 339    CONJUNCTION = {
 340        TokenType.AND: exp.And,
 341        TokenType.OR: exp.Or,
 342    }
 343
 344    EQUALITY = {
 345        TokenType.EQ: exp.EQ,
 346        TokenType.NEQ: exp.NEQ,
 347        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
 348    }
 349
 350    COMPARISON = {
 351        TokenType.GT: exp.GT,
 352        TokenType.GTE: exp.GTE,
 353        TokenType.LT: exp.LT,
 354        TokenType.LTE: exp.LTE,
 355    }
 356
 357    BITWISE = {
 358        TokenType.AMP: exp.BitwiseAnd,
 359        TokenType.CARET: exp.BitwiseXor,
 360        TokenType.PIPE: exp.BitwiseOr,
 361        TokenType.DPIPE: exp.DPipe,
 362    }
 363
 364    TERM = {
 365        TokenType.DASH: exp.Sub,
 366        TokenType.PLUS: exp.Add,
 367        TokenType.MOD: exp.Mod,
 368        TokenType.COLLATE: exp.Collate,
 369    }
 370
 371    FACTOR = {
 372        TokenType.DIV: exp.IntDiv,
 373        TokenType.LR_ARROW: exp.Distance,
 374        TokenType.SLASH: exp.Div,
 375        TokenType.STAR: exp.Mul,
 376    }
 377
 378    TIMESTAMPS = {
 379        TokenType.TIME,
 380        TokenType.TIMESTAMP,
 381        TokenType.TIMESTAMPTZ,
 382        TokenType.TIMESTAMPLTZ,
 383    }
 384
 385    SET_OPERATIONS = {
 386        TokenType.UNION,
 387        TokenType.INTERSECT,
 388        TokenType.EXCEPT,
 389    }
 390
 391    JOIN_SIDES = {
 392        TokenType.LEFT,
 393        TokenType.RIGHT,
 394        TokenType.FULL,
 395    }
 396
 397    JOIN_KINDS = {
 398        TokenType.INNER,
 399        TokenType.OUTER,
 400        TokenType.CROSS,
 401        TokenType.SEMI,
 402        TokenType.ANTI,
 403    }
 404
 405    LAMBDAS = {
 406        TokenType.ARROW: lambda self, expressions: self.expression(
 407            exp.Lambda,
 408            this=self._replace_lambda(
 409                self._parse_conjunction(),
 410                {node.name for node in expressions},
 411            ),
 412            expressions=expressions,
 413        ),
 414        TokenType.FARROW: lambda self, expressions: self.expression(
 415            exp.Kwarg,
 416            this=exp.Var(this=expressions[0].name),
 417            expression=self._parse_conjunction(),
 418        ),
 419    }
 420
 421    COLUMN_OPERATORS = {
 422        TokenType.DOT: None,
 423        TokenType.DCOLON: lambda self, this, to: self.expression(
 424            exp.Cast if self.STRICT_CAST else exp.TryCast,
 425            this=this,
 426            to=to,
 427        ),
 428        TokenType.ARROW: lambda self, this, path: self.expression(
 429            exp.JSONExtract,
 430            this=this,
 431            expression=path,
 432        ),
 433        TokenType.DARROW: lambda self, this, path: self.expression(
 434            exp.JSONExtractScalar,
 435            this=this,
 436            expression=path,
 437        ),
 438        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 439            exp.JSONBExtract,
 440            this=this,
 441            expression=path,
 442        ),
 443        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 444            exp.JSONBExtractScalar,
 445            this=this,
 446            expression=path,
 447        ),
 448        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 449            exp.JSONBContains,
 450            this=this,
 451            expression=key,
 452        ),
 453    }
 454
 455    EXPRESSION_PARSERS = {
 456        exp.Column: lambda self: self._parse_column(),
 457        exp.DataType: lambda self: self._parse_types(),
 458        exp.From: lambda self: self._parse_from(),
 459        exp.Group: lambda self: self._parse_group(),
 460        exp.Identifier: lambda self: self._parse_id_var(),
 461        exp.Lateral: lambda self: self._parse_lateral(),
 462        exp.Join: lambda self: self._parse_join(),
 463        exp.Order: lambda self: self._parse_order(),
 464        exp.Cluster: lambda self: self._parse_sort(TokenType.CLUSTER_BY, exp.Cluster),
 465        exp.Sort: lambda self: self._parse_sort(TokenType.SORT_BY, exp.Sort),
 466        exp.Lambda: lambda self: self._parse_lambda(),
 467        exp.Limit: lambda self: self._parse_limit(),
 468        exp.Offset: lambda self: self._parse_offset(),
 469        exp.TableAlias: lambda self: self._parse_table_alias(),
 470        exp.Table: lambda self: self._parse_table(),
 471        exp.Condition: lambda self: self._parse_conjunction(),
 472        exp.Expression: lambda self: self._parse_statement(),
 473        exp.Properties: lambda self: self._parse_properties(),
 474        exp.Where: lambda self: self._parse_where(),
 475        exp.Ordered: lambda self: self._parse_ordered(),
 476        exp.Having: lambda self: self._parse_having(),
 477        exp.With: lambda self: self._parse_with(),
 478        exp.Window: lambda self: self._parse_named_window(),
 479        exp.Qualify: lambda self: self._parse_qualify(),
 480        exp.Returning: lambda self: self._parse_returning(),
 481        "JOIN_TYPE": lambda self: self._parse_join_side_and_kind(),
 482    }
 483
 484    STATEMENT_PARSERS = {
 485        TokenType.ALTER: lambda self: self._parse_alter(),
 486        TokenType.BEGIN: lambda self: self._parse_transaction(),
 487        TokenType.CACHE: lambda self: self._parse_cache(),
 488        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 489        TokenType.COMMENT: lambda self: self._parse_comment(),
 490        TokenType.CREATE: lambda self: self._parse_create(),
 491        TokenType.DELETE: lambda self: self._parse_delete(),
 492        TokenType.DESC: lambda self: self._parse_describe(),
 493        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 494        TokenType.DROP: lambda self: self._parse_drop(),
 495        TokenType.END: lambda self: self._parse_commit_or_rollback(),
 496        TokenType.INSERT: lambda self: self._parse_insert(),
 497        TokenType.LOAD_DATA: lambda self: self._parse_load_data(),
 498        TokenType.MERGE: lambda self: self._parse_merge(),
 499        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()),
 500        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 501        TokenType.SET: lambda self: self._parse_set(),
 502        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 503        TokenType.UPDATE: lambda self: self._parse_update(),
 504        TokenType.USE: lambda self: self.expression(
 505            exp.Use,
 506            kind=self._match_texts(("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA"))
 507            and exp.Var(this=self._prev.text),
 508            this=self._parse_table(schema=False),
 509        ),
 510    }
 511
 512    UNARY_PARSERS = {
 513        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 514        TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()),
 515        TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()),
 516        TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()),
 517    }
 518
 519    PRIMARY_PARSERS = {
 520        TokenType.STRING: lambda self, token: self.expression(
 521            exp.Literal, this=token.text, is_string=True
 522        ),
 523        TokenType.NUMBER: lambda self, token: self.expression(
 524            exp.Literal, this=token.text, is_string=False
 525        ),
 526        TokenType.STAR: lambda self, _: self.expression(
 527            exp.Star,
 528            **{"except": self._parse_except(), "replace": self._parse_replace()},
 529        ),
 530        TokenType.NULL: lambda self, _: self.expression(exp.Null),
 531        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True),
 532        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False),
 533        TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text),
 534        TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text),
 535        TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text),
 536        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 537        TokenType.NATIONAL: lambda self, token: self._parse_national(token),
 538        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 539    }
 540
 541    PLACEHOLDER_PARSERS = {
 542        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder),
 543        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 544        TokenType.COLON: lambda self: self.expression(exp.Placeholder, this=self._prev.text)
 545        if self._match_set((TokenType.NUMBER, TokenType.VAR))
 546        else None,
 547    }
 548
 549    RANGE_PARSERS = {
 550        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 551        TokenType.GLOB: binary_range_parser(exp.Glob),
 552        TokenType.ILIKE: binary_range_parser(exp.ILike),
 553        TokenType.IN: lambda self, this: self._parse_in(this),
 554        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 555        TokenType.IS: lambda self, this: self._parse_is(this),
 556        TokenType.LIKE: binary_range_parser(exp.Like),
 557        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 558        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 559        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 560    }
 561
 562    PROPERTY_PARSERS = {
 563        "AFTER": lambda self: self._parse_afterjournal(
 564            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 565        ),
 566        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 567        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 568        "BEFORE": lambda self: self._parse_journal(
 569            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 570        ),
 571        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 572        "CHARACTER SET": lambda self: self._parse_character_set(),
 573        "CHECKSUM": lambda self: self._parse_checksum(),
 574        "CLUSTER BY": lambda self: self.expression(
 575            exp.Cluster, expressions=self._parse_csv(self._parse_ordered)
 576        ),
 577        "COLLATE": lambda self: self._parse_property_assignment(exp.CollateProperty),
 578        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 579        "DATABLOCKSIZE": lambda self: self._parse_datablocksize(
 580            default=self._prev.text.upper() == "DEFAULT"
 581        ),
 582        "DEFINER": lambda self: self._parse_definer(),
 583        "DETERMINISTIC": lambda self: self.expression(
 584            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 585        ),
 586        "DISTKEY": lambda self: self._parse_distkey(),
 587        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 588        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
 589        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 590        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty),
 591        "FALLBACK": lambda self: self._parse_fallback(no=self._prev.text.upper() == "NO"),
 592        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 593        "FREESPACE": lambda self: self._parse_freespace(),
 594        "GLOBAL": lambda self: self._parse_temporary(global_=True),
 595        "IMMUTABLE": lambda self: self.expression(
 596            exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE")
 597        ),
 598        "JOURNAL": lambda self: self._parse_journal(
 599            no=self._prev.text.upper() == "NO", dual=self._prev.text.upper() == "DUAL"
 600        ),
 601        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 602        "LIKE": lambda self: self._parse_create_like(),
 603        "LOCAL": lambda self: self._parse_afterjournal(no=False, dual=False, local=True),
 604        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 605        "LOCK": lambda self: self._parse_locking(),
 606        "LOCKING": lambda self: self._parse_locking(),
 607        "LOG": lambda self: self._parse_log(no=self._prev.text.upper() == "NO"),
 608        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty),
 609        "MAX": lambda self: self._parse_datablocksize(),
 610        "MAXIMUM": lambda self: self._parse_datablocksize(),
 611        "MERGEBLOCKRATIO": lambda self: self._parse_mergeblockratio(
 612            no=self._prev.text.upper() == "NO", default=self._prev.text.upper() == "DEFAULT"
 613        ),
 614        "MIN": lambda self: self._parse_datablocksize(),
 615        "MINIMUM": lambda self: self._parse_datablocksize(),
 616        "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True),
 617        "NO": lambda self: self._parse_noprimaryindex(),
 618        "NOT": lambda self: self._parse_afterjournal(no=False, dual=False, local=False),
 619        "ON": lambda self: self._parse_oncommit(),
 620        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
 621        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 622        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 623        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 624        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 625        "RETURNS": lambda self: self._parse_returns(),
 626        "ROW": lambda self: self._parse_row(),
 627        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 628        "SET": lambda self: self.expression(exp.SetProperty, multi=False),
 629        "SETTINGS": lambda self: self.expression(
 630            exp.SettingsProperty, expressions=self._parse_csv(self._parse_set_item)
 631        ),
 632        "SORTKEY": lambda self: self._parse_sortkey(),
 633        "STABLE": lambda self: self.expression(
 634            exp.StabilityProperty, this=exp.Literal.string("STABLE")
 635        ),
 636        "STORED": lambda self: self._parse_stored(),
 637        "TABLE_FORMAT": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 638        "TBLPROPERTIES": lambda self: self._parse_wrapped_csv(self._parse_property),
 639        "TEMP": lambda self: self._parse_temporary(global_=False),
 640        "TEMPORARY": lambda self: self._parse_temporary(global_=False),
 641        "TRANSIENT": lambda self: self.expression(exp.TransientProperty),
 642        "TTL": lambda self: self._parse_ttl(),
 643        "USING": lambda self: self._parse_property_assignment(exp.TableFormatProperty),
 644        "VOLATILE": lambda self: self._parse_volatile_property(),
 645        "WITH": lambda self: self._parse_with_property(),
 646    }
 647
 648    CONSTRAINT_PARSERS = {
 649        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 650        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 651        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False),
 652        "CHARACTER SET": lambda self: self.expression(
 653            exp.CharacterSetColumnConstraint, this=self._parse_var_or_string()
 654        ),
 655        "CHECK": lambda self: self.expression(
 656            exp.CheckColumnConstraint, this=self._parse_wrapped(self._parse_conjunction)
 657        ),
 658        "COLLATE": lambda self: self.expression(
 659            exp.CollateColumnConstraint, this=self._parse_var()
 660        ),
 661        "COMMENT": lambda self: self.expression(
 662            exp.CommentColumnConstraint, this=self._parse_string()
 663        ),
 664        "COMPRESS": lambda self: self._parse_compress(),
 665        "DEFAULT": lambda self: self.expression(
 666            exp.DefaultColumnConstraint, this=self._parse_bitwise()
 667        ),
 668        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()),
 669        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 670        "FORMAT": lambda self: self.expression(
 671            exp.DateFormatColumnConstraint, this=self._parse_var_or_string()
 672        ),
 673        "GENERATED": lambda self: self._parse_generated_as_identity(),
 674        "IDENTITY": lambda self: self._parse_auto_increment(),
 675        "INLINE": lambda self: self._parse_inline(),
 676        "LIKE": lambda self: self._parse_create_like(),
 677        "NOT": lambda self: self._parse_not_constraint(),
 678        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True),
 679        "ON": lambda self: self._match(TokenType.UPDATE)
 680        and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()),
 681        "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()),
 682        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 683        "REFERENCES": lambda self: self._parse_references(match=False),
 684        "TITLE": lambda self: self.expression(
 685            exp.TitleColumnConstraint, this=self._parse_var_or_string()
 686        ),
 687        "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]),
 688        "UNIQUE": lambda self: self._parse_unique(),
 689        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint),
 690    }
 691
 692    ALTER_PARSERS = {
 693        "ADD": lambda self: self._parse_alter_table_add(),
 694        "ALTER": lambda self: self._parse_alter_table_alter(),
 695        "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()),
 696        "DROP": lambda self: self._parse_alter_table_drop(),
 697        "RENAME": lambda self: self._parse_alter_table_rename(),
 698    }
 699
 700    SCHEMA_UNNAMED_CONSTRAINTS = {"CHECK", "FOREIGN KEY", "LIKE", "PRIMARY KEY", "UNIQUE"}
 701
 702    NO_PAREN_FUNCTION_PARSERS = {
 703        TokenType.ANY: lambda self: self.expression(exp.Any, this=self._parse_bitwise()),
 704        TokenType.CASE: lambda self: self._parse_case(),
 705        TokenType.IF: lambda self: self._parse_if(),
 706        TokenType.NEXT_VALUE_FOR: lambda self: self.expression(
 707            exp.NextValueFor,
 708            this=self._parse_column(),
 709            order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order),
 710        ),
 711    }
 712
 713    FUNCTION_PARSERS: t.Dict[str, t.Callable] = {
 714        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 715        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 716        "DECODE": lambda self: self._parse_decode(),
 717        "EXTRACT": lambda self: self._parse_extract(),
 718        "JSON_OBJECT": lambda self: self._parse_json_object(),
 719        "LOG": lambda self: self._parse_logarithm(),
 720        "MATCH": lambda self: self._parse_match_against(),
 721        "OPENJSON": lambda self: self._parse_open_json(),
 722        "POSITION": lambda self: self._parse_position(),
 723        "STRING_AGG": lambda self: self._parse_string_agg(),
 724        "SUBSTRING": lambda self: self._parse_substring(),
 725        "TRIM": lambda self: self._parse_trim(),
 726        "TRY_CAST": lambda self: self._parse_cast(False),
 727        "TRY_CONVERT": lambda self: self._parse_convert(False),
 728    }
 729
 730    QUERY_MODIFIER_PARSERS = {
 731        "joins": lambda self: list(iter(self._parse_join, None)),
 732        "laterals": lambda self: list(iter(self._parse_lateral, None)),
 733        "match": lambda self: self._parse_match_recognize(),
 734        "where": lambda self: self._parse_where(),
 735        "group": lambda self: self._parse_group(),
 736        "having": lambda self: self._parse_having(),
 737        "qualify": lambda self: self._parse_qualify(),
 738        "windows": lambda self: self._parse_window_clause(),
 739        "order": lambda self: self._parse_order(),
 740        "limit": lambda self: self._parse_limit(),
 741        "offset": lambda self: self._parse_offset(),
 742        "lock": lambda self: self._parse_lock(),
 743        "sample": lambda self: self._parse_table_sample(as_modifier=True),
 744    }
 745
 746    SET_PARSERS = {
 747        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 748        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 749        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 750        "TRANSACTION": lambda self: self._parse_set_transaction(),
 751    }
 752
 753    SHOW_PARSERS: t.Dict[str, t.Callable] = {}
 754
 755    TYPE_LITERAL_PARSERS: t.Dict[exp.DataType.Type, t.Callable] = {}
 756
 757    MODIFIABLES = (exp.Subquery, exp.Subqueryable, exp.Table)
 758
 759    TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 760
 761    TRANSACTION_CHARACTERISTICS = {
 762        "ISOLATION LEVEL REPEATABLE READ",
 763        "ISOLATION LEVEL READ COMMITTED",
 764        "ISOLATION LEVEL READ UNCOMMITTED",
 765        "ISOLATION LEVEL SERIALIZABLE",
 766        "READ WRITE",
 767        "READ ONLY",
 768    }
 769
 770    INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 771
 772    CLONE_KINDS = {"TIMESTAMP", "OFFSET", "STATEMENT"}
 773
 774    WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS}
 775    WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER}
 776
 777    ADD_CONSTRAINT_TOKENS = {TokenType.CONSTRAINT, TokenType.PRIMARY_KEY, TokenType.FOREIGN_KEY}
 778
 779    STRICT_CAST = True
 780
 781    CONVERT_TYPE_FIRST = False
 782
 783    PREFIXED_PIVOT_COLUMNS = False
 784    IDENTIFY_PIVOT_STRINGS = False
 785
 786    LOG_BASE_FIRST = True
 787    LOG_DEFAULTS_TO_LN = False
 788
 789    __slots__ = (
 790        "error_level",
 791        "error_message_context",
 792        "sql",
 793        "errors",
 794        "index_offset",
 795        "unnest_column_only",
 796        "alias_post_tablesample",
 797        "max_errors",
 798        "null_ordering",
 799        "_tokens",
 800        "_index",
 801        "_curr",
 802        "_next",
 803        "_prev",
 804        "_prev_comments",
 805        "_show_trie",
 806        "_set_trie",
 807    )
 808
 809    def __init__(
 810        self,
 811        error_level: t.Optional[ErrorLevel] = None,
 812        error_message_context: int = 100,
 813        index_offset: int = 0,
 814        unnest_column_only: bool = False,
 815        alias_post_tablesample: bool = False,
 816        max_errors: int = 3,
 817        null_ordering: t.Optional[str] = None,
 818    ):
 819        self.error_level = error_level or ErrorLevel.IMMEDIATE
 820        self.error_message_context = error_message_context
 821        self.index_offset = index_offset
 822        self.unnest_column_only = unnest_column_only
 823        self.alias_post_tablesample = alias_post_tablesample
 824        self.max_errors = max_errors
 825        self.null_ordering = null_ordering
 826        self.reset()
 827
 828    def reset(self):
 829        self.sql = ""
 830        self.errors = []
 831        self._tokens = []
 832        self._index = 0
 833        self._curr = None
 834        self._next = None
 835        self._prev = None
 836        self._prev_comments = None
 837
 838    def parse(
 839        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
 840    ) -> t.List[t.Optional[exp.Expression]]:
 841        """
 842        Parses a list of tokens and returns a list of syntax trees, one tree
 843        per parsed SQL statement.
 844
 845        Args:
 846            raw_tokens: the list of tokens.
 847            sql: the original SQL string, used to produce helpful debug messages.
 848
 849        Returns:
 850            The list of syntax trees.
 851        """
 852        return self._parse(
 853            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 854        )
 855
 856    def parse_into(
 857        self,
 858        expression_types: exp.IntoType,
 859        raw_tokens: t.List[Token],
 860        sql: t.Optional[str] = None,
 861    ) -> t.List[t.Optional[exp.Expression]]:
 862        """
 863        Parses a list of tokens into a given Expression type. If a collection of Expression
 864        types is given instead, this method will try to parse the token list into each one
 865        of them, stopping at the first for which the parsing succeeds.
 866
 867        Args:
 868            expression_types: the expression type(s) to try and parse the token list into.
 869            raw_tokens: the list of tokens.
 870            sql: the original SQL string, used to produce helpful debug messages.
 871
 872        Returns:
 873            The target Expression.
 874        """
 875        errors = []
 876        for expression_type in ensure_collection(expression_types):
 877            parser = self.EXPRESSION_PARSERS.get(expression_type)
 878            if not parser:
 879                raise TypeError(f"No parser registered for {expression_type}")
 880            try:
 881                return self._parse(parser, raw_tokens, sql)
 882            except ParseError as e:
 883                e.errors[0]["into_expression"] = expression_type
 884                errors.append(e)
 885        raise ParseError(
 886            f"Failed to parse into {expression_types}",
 887            errors=merge_errors(errors),
 888        ) from errors[-1]
 889
 890    def _parse(
 891        self,
 892        parse_method: t.Callable[[Parser], t.Optional[exp.Expression]],
 893        raw_tokens: t.List[Token],
 894        sql: t.Optional[str] = None,
 895    ) -> t.List[t.Optional[exp.Expression]]:
 896        self.reset()
 897        self.sql = sql or ""
 898        total = len(raw_tokens)
 899        chunks: t.List[t.List[Token]] = [[]]
 900
 901        for i, token in enumerate(raw_tokens):
 902            if token.token_type == TokenType.SEMICOLON:
 903                if i < total - 1:
 904                    chunks.append([])
 905            else:
 906                chunks[-1].append(token)
 907
 908        expressions = []
 909
 910        for tokens in chunks:
 911            self._index = -1
 912            self._tokens = tokens
 913            self._advance()
 914
 915            expressions.append(parse_method(self))
 916
 917            if self._index < len(self._tokens):
 918                self.raise_error("Invalid expression / Unexpected token")
 919
 920            self.check_errors()
 921
 922        return expressions
 923
 924    def check_errors(self) -> None:
 925        """
 926        Logs or raises any found errors, depending on the chosen error level setting.
 927        """
 928        if self.error_level == ErrorLevel.WARN:
 929            for error in self.errors:
 930                logger.error(str(error))
 931        elif self.error_level == ErrorLevel.RAISE and self.errors:
 932            raise ParseError(
 933                concat_messages(self.errors, self.max_errors),
 934                errors=merge_errors(self.errors),
 935            )
 936
 937    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
 938        """
 939        Appends an error in the list of recorded errors or raises it, depending on the chosen
 940        error level setting.
 941        """
 942        token = token or self._curr or self._prev or Token.string("")
 943        start = token.start
 944        end = token.end
 945        start_context = self.sql[max(start - self.error_message_context, 0) : start]
 946        highlight = self.sql[start:end]
 947        end_context = self.sql[end : end + self.error_message_context]
 948
 949        error = ParseError.new(
 950            f"{message}. Line {token.line}, Col: {token.col}.\n"
 951            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
 952            description=message,
 953            line=token.line,
 954            col=token.col,
 955            start_context=start_context,
 956            highlight=highlight,
 957            end_context=end_context,
 958        )
 959
 960        if self.error_level == ErrorLevel.IMMEDIATE:
 961            raise error
 962
 963        self.errors.append(error)
 964
 965    def expression(
 966        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
 967    ) -> E:
 968        """
 969        Creates a new, validated Expression.
 970
 971        Args:
 972            exp_class: the expression class to instantiate.
 973            comments: an optional list of comments to attach to the expression.
 974            kwargs: the arguments to set for the expression along with their respective values.
 975
 976        Returns:
 977            The target expression.
 978        """
 979        instance = exp_class(**kwargs)
 980        instance.add_comments(comments) if comments else self._add_comments(instance)
 981        self.validate_expression(instance)
 982        return instance
 983
 984    def _add_comments(self, expression: t.Optional[exp.Expression]) -> None:
 985        if expression and self._prev_comments:
 986            expression.add_comments(self._prev_comments)
 987            self._prev_comments = None
 988
 989    def validate_expression(
 990        self, expression: exp.Expression, args: t.Optional[t.List] = None
 991    ) -> None:
 992        """
 993        Validates an already instantiated expression, making sure that all its mandatory arguments
 994        are set.
 995
 996        Args:
 997            expression: the expression to validate.
 998            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 999        """
1000        if self.error_level == ErrorLevel.IGNORE:
1001            return
1002
1003        for error_message in expression.error_messages(args):
1004            self.raise_error(error_message)
1005
1006    def _find_sql(self, start: Token, end: Token) -> str:
1007        return self.sql[start.start : end.end]
1008
1009    def _advance(self, times: int = 1) -> None:
1010        self._index += times
1011        self._curr = seq_get(self._tokens, self._index)
1012        self._next = seq_get(self._tokens, self._index + 1)
1013        if self._index > 0:
1014            self._prev = self._tokens[self._index - 1]
1015            self._prev_comments = self._prev.comments
1016        else:
1017            self._prev = None
1018            self._prev_comments = None
1019
1020    def _retreat(self, index: int) -> None:
1021        if index != self._index:
1022            self._advance(index - self._index)
1023
1024    def _parse_command(self) -> exp.Command:
1025        return self.expression(exp.Command, this=self._prev.text, expression=self._parse_string())
1026
1027    def _parse_comment(self, allow_exists: bool = True) -> exp.Expression:
1028        start = self._prev
1029        exists = self._parse_exists() if allow_exists else None
1030
1031        self._match(TokenType.ON)
1032
1033        kind = self._match_set(self.CREATABLES) and self._prev
1034
1035        if not kind:
1036            return self._parse_as_command(start)
1037
1038        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1039            this = self._parse_user_defined_function(kind=kind.token_type)
1040        elif kind.token_type == TokenType.TABLE:
1041            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
1042        elif kind.token_type == TokenType.COLUMN:
1043            this = self._parse_column()
1044        else:
1045            this = self._parse_id_var()
1046
1047        self._match(TokenType.IS)
1048
1049        return self.expression(
1050            exp.Comment, this=this, kind=kind.text, expression=self._parse_string(), exists=exists
1051        )
1052
1053    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
1054    def _parse_ttl(self) -> exp.Expression:
1055        def _parse_ttl_action() -> t.Optional[exp.Expression]:
1056            this = self._parse_bitwise()
1057
1058            if self._match_text_seq("DELETE"):
1059                return self.expression(exp.MergeTreeTTLAction, this=this, delete=True)
1060            if self._match_text_seq("RECOMPRESS"):
1061                return self.expression(
1062                    exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise()
1063                )
1064            if self._match_text_seq("TO", "DISK"):
1065                return self.expression(
1066                    exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string()
1067                )
1068            if self._match_text_seq("TO", "VOLUME"):
1069                return self.expression(
1070                    exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string()
1071                )
1072
1073            return this
1074
1075        expressions = self._parse_csv(_parse_ttl_action)
1076        where = self._parse_where()
1077        group = self._parse_group()
1078
1079        aggregates = None
1080        if group and self._match(TokenType.SET):
1081            aggregates = self._parse_csv(self._parse_set_item)
1082
1083        return self.expression(
1084            exp.MergeTreeTTL,
1085            expressions=expressions,
1086            where=where,
1087            group=group,
1088            aggregates=aggregates,
1089        )
1090
1091    def _parse_statement(self) -> t.Optional[exp.Expression]:
1092        if self._curr is None:
1093            return None
1094
1095        if self._match_set(self.STATEMENT_PARSERS):
1096            return self.STATEMENT_PARSERS[self._prev.token_type](self)
1097
1098        if self._match_set(Tokenizer.COMMANDS):
1099            return self._parse_command()
1100
1101        expression = self._parse_expression()
1102        expression = self._parse_set_operations(expression) if expression else self._parse_select()
1103        return self._parse_query_modifiers(expression)
1104
1105    def _parse_drop(self) -> t.Optional[exp.Drop | exp.Command]:
1106        start = self._prev
1107        temporary = self._match(TokenType.TEMPORARY)
1108        materialized = self._match(TokenType.MATERIALIZED)
1109        kind = self._match_set(self.CREATABLES) and self._prev.text
1110        if not kind:
1111            return self._parse_as_command(start)
1112
1113        return self.expression(
1114            exp.Drop,
1115            exists=self._parse_exists(),
1116            this=self._parse_table(schema=True),
1117            kind=kind,
1118            temporary=temporary,
1119            materialized=materialized,
1120            cascade=self._match(TokenType.CASCADE),
1121            constraints=self._match_text_seq("CONSTRAINTS"),
1122            purge=self._match_text_seq("PURGE"),
1123        )
1124
1125    def _parse_exists(self, not_: bool = False) -> t.Optional[bool]:
1126        return (
1127            self._match(TokenType.IF)
1128            and (not not_ or self._match(TokenType.NOT))
1129            and self._match(TokenType.EXISTS)
1130        )
1131
1132    def _parse_create(self) -> t.Optional[exp.Expression]:
1133        start = self._prev
1134        replace = self._prev.text.upper() == "REPLACE" or self._match_pair(
1135            TokenType.OR, TokenType.REPLACE
1136        )
1137        unique = self._match(TokenType.UNIQUE)
1138
1139        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
1140            self._match(TokenType.TABLE)
1141
1142        properties = None
1143        create_token = self._match_set(self.CREATABLES) and self._prev
1144
1145        if not create_token:
1146            properties = self._parse_properties()  # exp.Properties.Location.POST_CREATE
1147            create_token = self._match_set(self.CREATABLES) and self._prev
1148
1149            if not properties or not create_token:
1150                return self._parse_as_command(start)
1151
1152        exists = self._parse_exists(not_=True)
1153        this = None
1154        expression = None
1155        indexes = None
1156        no_schema_binding = None
1157        begin = None
1158        clone = None
1159
1160        if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
1161            this = self._parse_user_defined_function(kind=create_token.token_type)
1162            temp_properties = self._parse_properties()
1163            if properties and temp_properties:
1164                properties.expressions.extend(temp_properties.expressions)
1165            elif temp_properties:
1166                properties = temp_properties
1167
1168            self._match(TokenType.ALIAS)
1169            begin = self._match(TokenType.BEGIN)
1170            return_ = self._match_text_seq("RETURN")
1171            expression = self._parse_statement()
1172
1173            if return_:
1174                expression = self.expression(exp.Return, this=expression)
1175        elif create_token.token_type == TokenType.INDEX:
1176            this = self._parse_index()
1177        elif create_token.token_type in self.DB_CREATABLES:
1178            table_parts = self._parse_table_parts(schema=True)
1179
1180            # exp.Properties.Location.POST_NAME
1181            if self._match(TokenType.COMMA):
1182                temp_properties = self._parse_properties(before=True)
1183                if properties and temp_properties:
1184                    properties.expressions.extend(temp_properties.expressions)
1185                elif temp_properties:
1186                    properties = temp_properties
1187
1188            this = self._parse_schema(this=table_parts)
1189
1190            # exp.Properties.Location.POST_SCHEMA and POST_WITH
1191            temp_properties = self._parse_properties()
1192            if properties and temp_properties:
1193                properties.expressions.extend(temp_properties.expressions)
1194            elif temp_properties:
1195                properties = temp_properties
1196
1197            self._match(TokenType.ALIAS)
1198
1199            # exp.Properties.Location.POST_ALIAS
1200            if not (
1201                self._match(TokenType.SELECT, advance=False)
1202                or self._match(TokenType.WITH, advance=False)
1203                or self._match(TokenType.L_PAREN, advance=False)
1204            ):
1205                temp_properties = self._parse_properties()
1206                if properties and temp_properties:
1207                    properties.expressions.extend(temp_properties.expressions)
1208                elif temp_properties:
1209                    properties = temp_properties
1210
1211            expression = self._parse_ddl_select()
1212
1213            if create_token.token_type == TokenType.TABLE:
1214                # exp.Properties.Location.POST_EXPRESSION
1215                temp_properties = self._parse_properties()
1216                if properties and temp_properties:
1217                    properties.expressions.extend(temp_properties.expressions)
1218                elif temp_properties:
1219                    properties = temp_properties
1220
1221                indexes = []
1222                while True:
1223                    index = self._parse_create_table_index()
1224
1225                    # exp.Properties.Location.POST_INDEX
1226                    if self._match(TokenType.PARTITION_BY, advance=False):
1227                        temp_properties = self._parse_properties()
1228                        if properties and temp_properties:
1229                            properties.expressions.extend(temp_properties.expressions)
1230                        elif temp_properties:
1231                            properties = temp_properties
1232
1233                    if not index:
1234                        break
1235                    else:
1236                        indexes.append(index)
1237            elif create_token.token_type == TokenType.VIEW:
1238                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
1239                    no_schema_binding = True
1240
1241            if self._match_text_seq("CLONE"):
1242                clone = self._parse_table(schema=True)
1243                when = self._match_texts({"AT", "BEFORE"}) and self._prev.text.upper()
1244                clone_kind = (
1245                    self._match(TokenType.L_PAREN)
1246                    and self._match_texts(self.CLONE_KINDS)
1247                    and self._prev.text.upper()
1248                )
1249                clone_expression = self._match(TokenType.FARROW) and self._parse_bitwise()
1250                self._match(TokenType.R_PAREN)
1251                clone = self.expression(
1252                    exp.Clone, this=clone, when=when, kind=clone_kind, expression=clone_expression
1253                )
1254
1255        return self.expression(
1256            exp.Create,
1257            this=this,
1258            kind=create_token.text,
1259            replace=replace,
1260            unique=unique,
1261            expression=expression,
1262            exists=exists,
1263            properties=properties,
1264            indexes=indexes,
1265            no_schema_binding=no_schema_binding,
1266            begin=begin,
1267            clone=clone,
1268        )
1269
1270    def _parse_property_before(self) -> t.Optional[exp.Expression]:
1271        self._match(TokenType.COMMA)
1272
1273        # parsers look to _prev for no/dual/default, so need to consume first
1274        self._match_text_seq("NO")
1275        self._match_text_seq("DUAL")
1276        self._match_text_seq("DEFAULT")
1277
1278        if self.PROPERTY_PARSERS.get(self._curr.text.upper()):
1279            return self.PROPERTY_PARSERS[self._curr.text.upper()](self)
1280
1281        return None
1282
1283    def _parse_property(self) -> t.Optional[exp.Expression]:
1284        if self._match_texts(self.PROPERTY_PARSERS):
1285            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
1286
1287        if self._match_pair(TokenType.DEFAULT, TokenType.CHARACTER_SET):
1288            return self._parse_character_set(default=True)
1289
1290        if self._match_pair(TokenType.COMPOUND, TokenType.SORTKEY):
1291            return self._parse_sortkey(compound=True)
1292
1293        if self._match_text_seq("SQL", "SECURITY"):
1294            return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER"))
1295
1296        assignment = self._match_pair(
1297            TokenType.VAR, TokenType.EQ, advance=False
1298        ) or self._match_pair(TokenType.STRING, TokenType.EQ, advance=False)
1299
1300        if assignment:
1301            key = self._parse_var_or_string()
1302            self._match(TokenType.EQ)
1303            return self.expression(exp.Property, this=key, value=self._parse_column())
1304
1305        return None
1306
1307    def _parse_stored(self) -> exp.Expression:
1308        self._match(TokenType.ALIAS)
1309
1310        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
1311        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
1312
1313        return self.expression(
1314            exp.FileFormatProperty,
1315            this=self.expression(
1316                exp.InputOutputFormat, input_format=input_format, output_format=output_format
1317            )
1318            if input_format or output_format
1319            else self._parse_var_or_string() or self._parse_number() or self._parse_id_var(),
1320        )
1321
1322    def _parse_property_assignment(self, exp_class: t.Type[exp.Expression]) -> exp.Expression:
1323        self._match(TokenType.EQ)
1324        self._match(TokenType.ALIAS)
1325        return self.expression(exp_class, this=self._parse_field())
1326
1327    def _parse_properties(self, before=None) -> t.Optional[exp.Expression]:
1328        properties = []
1329
1330        while True:
1331            if before:
1332                identified_property = self._parse_property_before()
1333            else:
1334                identified_property = self._parse_property()
1335
1336            if not identified_property:
1337                break
1338            for p in ensure_list(identified_property):
1339                properties.append(p)
1340
1341        if properties:
1342            return self.expression(exp.Properties, expressions=properties)
1343
1344        return None
1345
1346    def _parse_fallback(self, no=False) -> exp.Expression:
1347        self._match_text_seq("FALLBACK")
1348        return self.expression(
1349            exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION")
1350        )
1351
1352    def _parse_volatile_property(self) -> exp.Expression:
1353        if self._index >= 2:
1354            pre_volatile_token = self._tokens[self._index - 2]
1355        else:
1356            pre_volatile_token = None
1357
1358        if pre_volatile_token and pre_volatile_token.token_type in (
1359            TokenType.CREATE,
1360            TokenType.REPLACE,
1361            TokenType.UNIQUE,
1362        ):
1363            return exp.VolatileProperty()
1364
1365        return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE"))
1366
1367    def _parse_with_property(
1368        self,
1369    ) -> t.Union[t.Optional[exp.Expression], t.List[t.Optional[exp.Expression]]]:
1370        self._match(TokenType.WITH)
1371        if self._match(TokenType.L_PAREN, advance=False):
1372            return self._parse_wrapped_csv(self._parse_property)
1373
1374        if self._match_text_seq("JOURNAL"):
1375            return self._parse_withjournaltable()
1376
1377        if self._match_text_seq("DATA"):
1378            return self._parse_withdata(no=False)
1379        elif self._match_text_seq("NO", "DATA"):
1380            return self._parse_withdata(no=True)
1381
1382        if not self._next:
1383            return None
1384
1385        return self._parse_withisolatedloading()
1386
1387    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
1388    def _parse_definer(self) -> t.Optional[exp.Expression]:
1389        self._match(TokenType.EQ)
1390
1391        user = self._parse_id_var()
1392        self._match(TokenType.PARAMETER)
1393        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
1394
1395        if not user or not host:
1396            return None
1397
1398        return exp.DefinerProperty(this=f"{user}@{host}")
1399
1400    def _parse_withjournaltable(self) -> exp.Expression:
1401        self._match(TokenType.TABLE)
1402        self._match(TokenType.EQ)
1403        return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts())
1404
1405    def _parse_log(self, no=False) -> exp.Expression:
1406        self._match_text_seq("LOG")
1407        return self.expression(exp.LogProperty, no=no)
1408
1409    def _parse_journal(self, no=False, dual=False) -> exp.Expression:
1410        before = self._match_text_seq("BEFORE")
1411        self._match_text_seq("JOURNAL")
1412        return self.expression(exp.JournalProperty, no=no, dual=dual, before=before)
1413
1414    def _parse_afterjournal(self, no=False, dual=False, local=None) -> exp.Expression:
1415        self._match_text_seq("NOT")
1416        self._match_text_seq("LOCAL")
1417        self._match_text_seq("AFTER", "JOURNAL")
1418        return self.expression(exp.AfterJournalProperty, no=no, dual=dual, local=local)
1419
1420    def _parse_checksum(self) -> exp.Expression:
1421        self._match_text_seq("CHECKSUM")
1422        self._match(TokenType.EQ)
1423
1424        on = None
1425        if self._match(TokenType.ON):
1426            on = True
1427        elif self._match_text_seq("OFF"):
1428            on = False
1429        default = self._match(TokenType.DEFAULT)
1430
1431        return self.expression(
1432            exp.ChecksumProperty,
1433            on=on,
1434            default=default,
1435        )
1436
1437    def _parse_freespace(self) -> exp.Expression:
1438        self._match_text_seq("FREESPACE")
1439        self._match(TokenType.EQ)
1440        return self.expression(
1441            exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT)
1442        )
1443
1444    def _parse_mergeblockratio(self, no=False, default=False) -> exp.Expression:
1445        self._match_text_seq("MERGEBLOCKRATIO")
1446        if self._match(TokenType.EQ):
1447            return self.expression(
1448                exp.MergeBlockRatioProperty,
1449                this=self._parse_number(),
1450                percent=self._match(TokenType.PERCENT),
1451            )
1452        else:
1453            return self.expression(
1454                exp.MergeBlockRatioProperty,
1455                no=no,
1456                default=default,
1457            )
1458
1459    def _parse_datablocksize(self, default=None) -> exp.Expression:
1460        if default:
1461            self._match_text_seq("DATABLOCKSIZE")
1462            return self.expression(exp.DataBlocksizeProperty, default=True)
1463        elif self._match_texts(("MIN", "MINIMUM")):
1464            self._match_text_seq("DATABLOCKSIZE")
1465            return self.expression(exp.DataBlocksizeProperty, min=True)
1466        elif self._match_texts(("MAX", "MAXIMUM")):
1467            self._match_text_seq("DATABLOCKSIZE")
1468            return self.expression(exp.DataBlocksizeProperty, min=False)
1469
1470        self._match_text_seq("DATABLOCKSIZE")
1471        self._match(TokenType.EQ)
1472        size = self._parse_number()
1473        units = None
1474        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
1475            units = self._prev.text
1476        return self.expression(exp.DataBlocksizeProperty, size=size, units=units)
1477
1478    def _parse_blockcompression(self) -> exp.Expression:
1479        self._match_text_seq("BLOCKCOMPRESSION")
1480        self._match(TokenType.EQ)
1481        always = self._match_text_seq("ALWAYS")
1482        manual = self._match_text_seq("MANUAL")
1483        never = self._match_text_seq("NEVER")
1484        default = self._match_text_seq("DEFAULT")
1485        autotemp = None
1486        if self._match_text_seq("AUTOTEMP"):
1487            autotemp = self._parse_schema()
1488
1489        return self.expression(
1490            exp.BlockCompressionProperty,
1491            always=always,
1492            manual=manual,
1493            never=never,
1494            default=default,
1495            autotemp=autotemp,
1496        )
1497
1498    def _parse_withisolatedloading(self) -> exp.Expression:
1499        no = self._match_text_seq("NO")
1500        concurrent = self._match_text_seq("CONCURRENT")
1501        self._match_text_seq("ISOLATED", "LOADING")
1502        for_all = self._match_text_seq("FOR", "ALL")
1503        for_insert = self._match_text_seq("FOR", "INSERT")
1504        for_none = self._match_text_seq("FOR", "NONE")
1505        return self.expression(
1506            exp.IsolatedLoadingProperty,
1507            no=no,
1508            concurrent=concurrent,
1509            for_all=for_all,
1510            for_insert=for_insert,
1511            for_none=for_none,
1512        )
1513
1514    def _parse_locking(self) -> exp.Expression:
1515        if self._match(TokenType.TABLE):
1516            kind = "TABLE"
1517        elif self._match(TokenType.VIEW):
1518            kind = "VIEW"
1519        elif self._match(TokenType.ROW):
1520            kind = "ROW"
1521        elif self._match_text_seq("DATABASE"):
1522            kind = "DATABASE"
1523        else:
1524            kind = None
1525
1526        if kind in ("DATABASE", "TABLE", "VIEW"):
1527            this = self._parse_table_parts()
1528        else:
1529            this = None
1530
1531        if self._match(TokenType.FOR):
1532            for_or_in = "FOR"
1533        elif self._match(TokenType.IN):
1534            for_or_in = "IN"
1535        else:
1536            for_or_in = None
1537
1538        if self._match_text_seq("ACCESS"):
1539            lock_type = "ACCESS"
1540        elif self._match_texts(("EXCL", "EXCLUSIVE")):
1541            lock_type = "EXCLUSIVE"
1542        elif self._match_text_seq("SHARE"):
1543            lock_type = "SHARE"
1544        elif self._match_text_seq("READ"):
1545            lock_type = "READ"
1546        elif self._match_text_seq("WRITE"):
1547            lock_type = "WRITE"
1548        elif self._match_text_seq("CHECKSUM"):
1549            lock_type = "CHECKSUM"
1550        else:
1551            lock_type = None
1552
1553        override = self._match_text_seq("OVERRIDE")
1554
1555        return self.expression(
1556            exp.LockingProperty,
1557            this=this,
1558            kind=kind,
1559            for_or_in=for_or_in,
1560            lock_type=lock_type,
1561            override=override,
1562        )
1563
1564    def _parse_partition_by(self) -> t.List[t.Optional[exp.Expression]]:
1565        if self._match(TokenType.PARTITION_BY):
1566            return self._parse_csv(self._parse_conjunction)
1567        return []
1568
1569    def _parse_partitioned_by(self) -> exp.Expression:
1570        self._match(TokenType.EQ)
1571        return self.expression(
1572            exp.PartitionedByProperty,
1573            this=self._parse_schema() or self._parse_bracket(self._parse_field()),
1574        )
1575
1576    def _parse_withdata(self, no=False) -> exp.Expression:
1577        if self._match_text_seq("AND", "STATISTICS"):
1578            statistics = True
1579        elif self._match_text_seq("AND", "NO", "STATISTICS"):
1580            statistics = False
1581        else:
1582            statistics = None
1583
1584        return self.expression(exp.WithDataProperty, no=no, statistics=statistics)
1585
1586    def _parse_noprimaryindex(self) -> exp.Expression:
1587        self._match_text_seq("PRIMARY", "INDEX")
1588        return exp.NoPrimaryIndexProperty()
1589
1590    def _parse_oncommit(self) -> exp.Expression:
1591        self._match_text_seq("COMMIT", "PRESERVE", "ROWS")
1592        return exp.OnCommitProperty()
1593
1594    def _parse_distkey(self) -> exp.Expression:
1595        return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var))
1596
1597    def _parse_create_like(self) -> t.Optional[exp.Expression]:
1598        table = self._parse_table(schema=True)
1599        options = []
1600        while self._match_texts(("INCLUDING", "EXCLUDING")):
1601            this = self._prev.text.upper()
1602            id_var = self._parse_id_var()
1603
1604            if not id_var:
1605                return None
1606
1607            options.append(
1608                self.expression(
1609                    exp.Property,
1610                    this=this,
1611                    value=exp.Var(this=id_var.this.upper()),
1612                )
1613            )
1614        return self.expression(exp.LikeProperty, this=table, expressions=options)
1615
1616    def _parse_sortkey(self, compound: bool = False) -> exp.Expression:
1617        return self.expression(
1618            exp.SortKeyProperty, this=self._parse_wrapped_csv(self._parse_id_var), compound=compound
1619        )
1620
1621    def _parse_character_set(self, default: bool = False) -> exp.Expression:
1622        self._match(TokenType.EQ)
1623        return self.expression(
1624            exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default
1625        )
1626
1627    def _parse_returns(self) -> exp.Expression:
1628        value: t.Optional[exp.Expression]
1629        is_table = self._match(TokenType.TABLE)
1630
1631        if is_table:
1632            if self._match(TokenType.LT):
1633                value = self.expression(
1634                    exp.Schema,
1635                    this="TABLE",
1636                    expressions=self._parse_csv(self._parse_struct_types),
1637                )
1638                if not self._match(TokenType.GT):
1639                    self.raise_error("Expecting >")
1640            else:
1641                value = self._parse_schema(exp.Var(this="TABLE"))
1642        else:
1643            value = self._parse_types()
1644
1645        return self.expression(exp.ReturnsProperty, this=value, is_table=is_table)
1646
1647    def _parse_temporary(self, global_=False) -> exp.Expression:
1648        self._match(TokenType.TEMPORARY)  # in case calling from "GLOBAL"
1649        return self.expression(exp.TemporaryProperty, global_=global_)
1650
1651    def _parse_describe(self) -> exp.Expression:
1652        kind = self._match_set(self.CREATABLES) and self._prev.text
1653        this = self._parse_table()
1654
1655        return self.expression(exp.Describe, this=this, kind=kind)
1656
1657    def _parse_insert(self) -> exp.Expression:
1658        overwrite = self._match(TokenType.OVERWRITE)
1659        local = self._match(TokenType.LOCAL)
1660        alternative = None
1661
1662        if self._match_text_seq("DIRECTORY"):
1663            this: t.Optional[exp.Expression] = self.expression(
1664                exp.Directory,
1665                this=self._parse_var_or_string(),
1666                local=local,
1667                row_format=self._parse_row_format(match_row=True),
1668            )
1669        else:
1670            if self._match(TokenType.OR):
1671                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
1672
1673            self._match(TokenType.INTO)
1674            self._match(TokenType.TABLE)
1675            this = self._parse_table(schema=True)
1676
1677        return self.expression(
1678            exp.Insert,
1679            this=this,
1680            exists=self._parse_exists(),
1681            partition=self._parse_partition(),
1682            expression=self._parse_ddl_select(),
1683            conflict=self._parse_on_conflict(),
1684            returning=self._parse_returning(),
1685            overwrite=overwrite,
1686            alternative=alternative,
1687        )
1688
1689    def _parse_on_conflict(self) -> t.Optional[exp.Expression]:
1690        conflict = self._match_text_seq("ON", "CONFLICT")
1691        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
1692
1693        if not (conflict or duplicate):
1694            return None
1695
1696        nothing = None
1697        expressions = None
1698        key = None
1699        constraint = None
1700
1701        if conflict:
1702            if self._match_text_seq("ON", "CONSTRAINT"):
1703                constraint = self._parse_id_var()
1704            else:
1705                key = self._parse_csv(self._parse_value)
1706
1707        self._match_text_seq("DO")
1708        if self._match_text_seq("NOTHING"):
1709            nothing = True
1710        else:
1711            self._match(TokenType.UPDATE)
1712            expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality)
1713
1714        return self.expression(
1715            exp.OnConflict,
1716            duplicate=duplicate,
1717            expressions=expressions,
1718            nothing=nothing,
1719            key=key,
1720            constraint=constraint,
1721        )
1722
1723    def _parse_returning(self) -> t.Optional[exp.Expression]:
1724        if not self._match(TokenType.RETURNING):
1725            return None
1726
1727        return self.expression(exp.Returning, expressions=self._parse_csv(self._parse_column))
1728
1729    def _parse_row(self) -> t.Optional[exp.Expression]:
1730        if not self._match(TokenType.FORMAT):
1731            return None
1732        return self._parse_row_format()
1733
1734    def _parse_row_format(self, match_row: bool = False) -> t.Optional[exp.Expression]:
1735        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
1736            return None
1737
1738        if self._match_text_seq("SERDE"):
1739            return self.expression(exp.RowFormatSerdeProperty, this=self._parse_string())
1740
1741        self._match_text_seq("DELIMITED")
1742
1743        kwargs = {}
1744
1745        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
1746            kwargs["fields"] = self._parse_string()
1747            if self._match_text_seq("ESCAPED", "BY"):
1748                kwargs["escaped"] = self._parse_string()
1749        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
1750            kwargs["collection_items"] = self._parse_string()
1751        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
1752            kwargs["map_keys"] = self._parse_string()
1753        if self._match_text_seq("LINES", "TERMINATED", "BY"):
1754            kwargs["lines"] = self._parse_string()
1755        if self._match_text_seq("NULL", "DEFINED", "AS"):
1756            kwargs["null"] = self._parse_string()
1757
1758        return self.expression(exp.RowFormatDelimitedProperty, **kwargs)  # type: ignore
1759
1760    def _parse_load_data(self) -> exp.Expression:
1761        local = self._match(TokenType.LOCAL)
1762        self._match_text_seq("INPATH")
1763        inpath = self._parse_string()
1764        overwrite = self._match(TokenType.OVERWRITE)
1765        self._match_pair(TokenType.INTO, TokenType.TABLE)
1766
1767        return self.expression(
1768            exp.LoadData,
1769            this=self._parse_table(schema=True),
1770            local=local,
1771            overwrite=overwrite,
1772            inpath=inpath,
1773            partition=self._parse_partition(),
1774            input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
1775            serde=self._match_text_seq("SERDE") and self._parse_string(),
1776        )
1777
1778    def _parse_delete(self) -> exp.Expression:
1779        self._match(TokenType.FROM)
1780
1781        return self.expression(
1782            exp.Delete,
1783            this=self._parse_table(),
1784            using=self._parse_csv(lambda: self._match(TokenType.USING) and self._parse_table()),
1785            where=self._parse_where(),
1786            returning=self._parse_returning(),
1787        )
1788
1789    def _parse_update(self) -> exp.Expression:
1790        return self.expression(
1791            exp.Update,
1792            **{  # type: ignore
1793                "this": self._parse_table(alias_tokens=self.UPDATE_ALIAS_TOKENS),
1794                "expressions": self._match(TokenType.SET) and self._parse_csv(self._parse_equality),
1795                "from": self._parse_from(modifiers=True),
1796                "where": self._parse_where(),
1797                "returning": self._parse_returning(),
1798            },
1799        )
1800
1801    def _parse_uncache(self) -> exp.Expression:
1802        if not self._match(TokenType.TABLE):
1803            self.raise_error("Expecting TABLE after UNCACHE")
1804
1805        return self.expression(
1806            exp.Uncache,
1807            exists=self._parse_exists(),
1808            this=self._parse_table(schema=True),
1809        )
1810
1811    def _parse_cache(self) -> exp.Expression:
1812        lazy = self._match(TokenType.LAZY)
1813        self._match(TokenType.TABLE)
1814        table = self._parse_table(schema=True)
1815        options = []
1816
1817        if self._match(TokenType.OPTIONS):
1818            self._match_l_paren()
1819            k = self._parse_string()
1820            self._match(TokenType.EQ)
1821            v = self._parse_string()
1822            options = [k, v]
1823            self._match_r_paren()
1824
1825        self._match(TokenType.ALIAS)
1826        return self.expression(
1827            exp.Cache,
1828            this=table,
1829            lazy=lazy,
1830            options=options,
1831            expression=self._parse_select(nested=True),
1832        )
1833
1834    def _parse_partition(self) -> t.Optional[exp.Expression]:
1835        if not self._match(TokenType.PARTITION):
1836            return None
1837
1838        return self.expression(
1839            exp.Partition, expressions=self._parse_wrapped_csv(self._parse_conjunction)
1840        )
1841
1842    def _parse_value(self) -> exp.Expression:
1843        if self._match(TokenType.L_PAREN):
1844            expressions = self._parse_csv(self._parse_conjunction)
1845            self._match_r_paren()
1846            return self.expression(exp.Tuple, expressions=expressions)
1847
1848        # In presto we can have VALUES 1, 2 which results in 1 column & 2 rows.
1849        # Source: https://prestodb.io/docs/current/sql/values.html
1850        return self.expression(exp.Tuple, expressions=[self._parse_conjunction()])
1851
1852    def _parse_select(
1853        self, nested: bool = False, table: bool = False, parse_subquery_alias: bool = True
1854    ) -> t.Optional[exp.Expression]:
1855        cte = self._parse_with()
1856        if cte:
1857            this = self._parse_statement()
1858
1859            if not this:
1860                self.raise_error("Failed to parse any statement following CTE")
1861                return cte
1862
1863            if "with" in this.arg_types:
1864                this.set("with", cte)
1865            else:
1866                self.raise_error(f"{this.key} does not support CTE")
1867                this = cte
1868        elif self._match(TokenType.SELECT):
1869            comments = self._prev_comments
1870
1871            hint = self._parse_hint()
1872            all_ = self._match(TokenType.ALL)
1873            distinct = self._match(TokenType.DISTINCT)
1874
1875            kind = (
1876                self._match(TokenType.ALIAS)
1877                and self._match_texts(("STRUCT", "VALUE"))
1878                and self._prev.text
1879            )
1880
1881            if distinct:
1882                distinct = self.expression(
1883                    exp.Distinct,
1884                    on=self._parse_value() if self._match(TokenType.ON) else None,
1885                )
1886
1887            if all_ and distinct:
1888                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
1889
1890            limit = self._parse_limit(top=True)
1891            expressions = self._parse_csv(self._parse_expression)
1892
1893            this = self.expression(
1894                exp.Select,
1895                kind=kind,
1896                hint=hint,
1897                distinct=distinct,
1898                expressions=expressions,
1899                limit=limit,
1900            )
1901            this.comments = comments
1902
1903            into = self._parse_into()
1904            if into:
1905                this.set("into", into)
1906
1907            from_ = self._parse_from()
1908            if from_:
1909                this.set("from", from_)
1910
1911            this = self._parse_query_modifiers(this)
1912        elif (table or nested) and self._match(TokenType.L_PAREN):
1913            this = self._parse_table() if table else self._parse_select(nested=True)
1914            this = self._parse_set_operations(self._parse_query_modifiers(this))
1915            self._match_r_paren()
1916
1917            # early return so that subquery unions aren't parsed again
1918            # SELECT * FROM (SELECT 1) UNION ALL SELECT 1
1919            # Union ALL should be a property of the top select node, not the subquery
1920            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
1921        elif self._match(TokenType.VALUES):
1922            this = self.expression(
1923                exp.Values,
1924                expressions=self._parse_csv(self._parse_value),
1925                alias=self._parse_table_alias(),
1926            )
1927        else:
1928            this = None
1929
1930        return self._parse_set_operations(this)
1931
1932    def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.Expression]:
1933        if not skip_with_token and not self._match(TokenType.WITH):
1934            return None
1935
1936        comments = self._prev_comments
1937        recursive = self._match(TokenType.RECURSIVE)
1938
1939        expressions = []
1940        while True:
1941            expressions.append(self._parse_cte())
1942
1943            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
1944                break
1945            else:
1946                self._match(TokenType.WITH)
1947
1948        return self.expression(
1949            exp.With, comments=comments, expressions=expressions, recursive=recursive
1950        )
1951
1952    def _parse_cte(self) -> exp.Expression:
1953        alias = self._parse_table_alias()
1954        if not alias or not alias.this:
1955            self.raise_error("Expected CTE to have alias")
1956
1957        self._match(TokenType.ALIAS)
1958
1959        return self.expression(
1960            exp.CTE,
1961            this=self._parse_wrapped(self._parse_statement),
1962            alias=alias,
1963        )
1964
1965    def _parse_table_alias(
1966        self, alias_tokens: t.Optional[t.Collection[TokenType]] = None
1967    ) -> t.Optional[exp.Expression]:
1968        any_token = self._match(TokenType.ALIAS)
1969        alias = (
1970            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
1971            or self._parse_string_as_identifier()
1972        )
1973
1974        index = self._index
1975        if self._match(TokenType.L_PAREN):
1976            columns = self._parse_csv(self._parse_function_parameter)
1977            self._match_r_paren() if columns else self._retreat(index)
1978        else:
1979            columns = None
1980
1981        if not alias and not columns:
1982            return None
1983
1984        return self.expression(exp.TableAlias, this=alias, columns=columns)
1985
1986    def _parse_subquery(
1987        self, this: t.Optional[exp.Expression], parse_alias: bool = True
1988    ) -> exp.Expression:
1989        return self.expression(
1990            exp.Subquery,
1991            this=this,
1992            pivots=self._parse_pivots(),
1993            alias=self._parse_table_alias() if parse_alias else None,
1994        )
1995
1996    def _parse_query_modifiers(
1997        self, this: t.Optional[exp.Expression]
1998    ) -> t.Optional[exp.Expression]:
1999        if isinstance(this, self.MODIFIABLES):
2000            for key, parser in self.QUERY_MODIFIER_PARSERS.items():
2001                expression = parser(self)
2002
2003                if expression:
2004                    this.set(key, expression)
2005        return this
2006
2007    def _parse_hint(self) -> t.Optional[exp.Expression]:
2008        if self._match(TokenType.HINT):
2009            hints = self._parse_csv(self._parse_function)
2010            if not self._match_pair(TokenType.STAR, TokenType.SLASH):
2011                self.raise_error("Expected */ after HINT")
2012            return self.expression(exp.Hint, expressions=hints)
2013
2014        return None
2015
2016    def _parse_into(self) -> t.Optional[exp.Expression]:
2017        if not self._match(TokenType.INTO):
2018            return None
2019
2020        temp = self._match(TokenType.TEMPORARY)
2021        unlogged = self._match(TokenType.UNLOGGED)
2022        self._match(TokenType.TABLE)
2023
2024        return self.expression(
2025            exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged
2026        )
2027
2028    def _parse_from(self, modifiers: bool = False) -> t.Optional[exp.Expression]:
2029        if not self._match(TokenType.FROM):
2030            return None
2031
2032        comments = self._prev_comments
2033        this = self._parse_table()
2034
2035        return self.expression(
2036            exp.From,
2037            comments=comments,
2038            this=self._parse_query_modifiers(this) if modifiers else this,
2039        )
2040
2041    def _parse_match_recognize(self) -> t.Optional[exp.Expression]:
2042        if not self._match(TokenType.MATCH_RECOGNIZE):
2043            return None
2044
2045        self._match_l_paren()
2046
2047        partition = self._parse_partition_by()
2048        order = self._parse_order()
2049        measures = (
2050            self._parse_csv(self._parse_expression) if self._match_text_seq("MEASURES") else None
2051        )
2052
2053        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
2054            rows = exp.Var(this="ONE ROW PER MATCH")
2055        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
2056            text = "ALL ROWS PER MATCH"
2057            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
2058                text += f" SHOW EMPTY MATCHES"
2059            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
2060                text += f" OMIT EMPTY MATCHES"
2061            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
2062                text += f" WITH UNMATCHED ROWS"
2063            rows = exp.Var(this=text)
2064        else:
2065            rows = None
2066
2067        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
2068            text = "AFTER MATCH SKIP"
2069            if self._match_text_seq("PAST", "LAST", "ROW"):
2070                text += f" PAST LAST ROW"
2071            elif self._match_text_seq("TO", "NEXT", "ROW"):
2072                text += f" TO NEXT ROW"
2073            elif self._match_text_seq("TO", "FIRST"):
2074                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
2075            elif self._match_text_seq("TO", "LAST"):
2076                text += f" TO LAST {self._advance_any().text}"  # type: ignore
2077            after = exp.Var(this=text)
2078        else:
2079            after = None
2080
2081        if self._match_text_seq("PATTERN"):
2082            self._match_l_paren()
2083
2084            if not self._curr:
2085                self.raise_error("Expecting )", self._curr)
2086
2087            paren = 1
2088            start = self._curr
2089
2090            while self._curr and paren > 0:
2091                if self._curr.token_type == TokenType.L_PAREN:
2092                    paren += 1
2093                if self._curr.token_type == TokenType.R_PAREN:
2094                    paren -= 1
2095                end = self._prev
2096                self._advance()
2097            if paren > 0:
2098                self.raise_error("Expecting )", self._curr)
2099            pattern = exp.Var(this=self._find_sql(start, end))
2100        else:
2101            pattern = None
2102
2103        define = (
2104            self._parse_csv(
2105                lambda: self.expression(
2106                    exp.Alias,
2107                    alias=self._parse_id_var(any_token=True),
2108                    this=self._match(TokenType.ALIAS) and self._parse_conjunction(),
2109                )
2110            )
2111            if self._match_text_seq("DEFINE")
2112            else None
2113        )
2114
2115        self._match_r_paren()
2116
2117        return self.expression(
2118            exp.MatchRecognize,
2119            partition_by=partition,
2120            order=order,
2121            measures=measures,
2122            rows=rows,
2123            after=after,
2124            pattern=pattern,
2125            define=define,
2126            alias=self._parse_table_alias(),
2127        )
2128
2129    def _parse_lateral(self) -> t.Optional[exp.Expression]:
2130        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY)
2131        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY)
2132
2133        if outer_apply or cross_apply:
2134            this = self._parse_select(table=True)
2135            view = None
2136            outer = not cross_apply
2137        elif self._match(TokenType.LATERAL):
2138            this = self._parse_select(table=True)
2139            view = self._match(TokenType.VIEW)
2140            outer = self._match(TokenType.OUTER)
2141        else:
2142            return None
2143
2144        if not this:
2145            this = self._parse_function() or self._parse_id_var(any_token=False)
2146            while self._match(TokenType.DOT):
2147                this = exp.Dot(
2148                    this=this,
2149                    expression=self._parse_function() or self._parse_id_var(any_token=False),
2150                )
2151
2152        table_alias: t.Optional[exp.Expression]
2153
2154        if view:
2155            table = self._parse_id_var(any_token=False)
2156            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
2157            table_alias = self.expression(exp.TableAlias, this=table, columns=columns)
2158        else:
2159            table_alias = self._parse_table_alias()
2160
2161        expression = self.expression(
2162            exp.Lateral,
2163            this=this,
2164            view=view,
2165            outer=outer,
2166            alias=table_alias,
2167        )
2168
2169        return expression
2170
2171    def _parse_join_side_and_kind(
2172        self,
2173    ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]:
2174        return (
2175            self._match(TokenType.NATURAL) and self._prev,
2176            self._match_set(self.JOIN_SIDES) and self._prev,
2177            self._match_set(self.JOIN_KINDS) and self._prev,
2178        )
2179
2180    def _parse_join(self, skip_join_token: bool = False) -> t.Optional[exp.Expression]:
2181        if self._match(TokenType.COMMA):
2182            return self.expression(exp.Join, this=self._parse_table())
2183
2184        index = self._index
2185        natural, side, kind = self._parse_join_side_and_kind()
2186        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
2187        join = self._match(TokenType.JOIN)
2188
2189        if not skip_join_token and not join:
2190            self._retreat(index)
2191            kind = None
2192            natural = None
2193            side = None
2194
2195        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
2196        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
2197
2198        if not skip_join_token and not join and not outer_apply and not cross_apply:
2199            return None
2200
2201        if outer_apply:
2202            side = Token(TokenType.LEFT, "LEFT")
2203
2204        kwargs: t.Dict[
2205            str, t.Optional[exp.Expression] | bool | str | t.List[t.Optional[exp.Expression]]
2206        ] = {"this": self._parse_table()}
2207
2208        if natural:
2209            kwargs["natural"] = True
2210        if side:
2211            kwargs["side"] = side.text
2212        if kind:
2213            kwargs["kind"] = kind.text
2214        if hint:
2215            kwargs["hint"] = hint
2216
2217        if self._match(TokenType.ON):
2218            kwargs["on"] = self._parse_conjunction()
2219        elif self._match(TokenType.USING):
2220            kwargs["using"] = self._parse_wrapped_id_vars()
2221
2222        return self.expression(exp.Join, **kwargs)  # type: ignore
2223
2224    def _parse_index(self) -> exp.Expression:
2225        index = self._parse_id_var()
2226        self._match(TokenType.ON)
2227        self._match(TokenType.TABLE)  # hive
2228
2229        return self.expression(
2230            exp.Index,
2231            this=index,
2232            table=self.expression(exp.Table, this=self._parse_id_var()),
2233            columns=self._parse_expression(),
2234        )
2235
2236    def _parse_create_table_index(self) -> t.Optional[exp.Expression]:
2237        unique = self._match(TokenType.UNIQUE)
2238        primary = self._match_text_seq("PRIMARY")
2239        amp = self._match_text_seq("AMP")
2240        if not self._match(TokenType.INDEX):
2241            return None
2242        index = self._parse_id_var()
2243        columns = None
2244        if self._match(TokenType.L_PAREN, advance=False):
2245            columns = self._parse_wrapped_csv(self._parse_column)
2246        return self.expression(
2247            exp.Index,
2248            this=index,
2249            columns=columns,
2250            unique=unique,
2251            primary=primary,
2252            amp=amp,
2253        )
2254
2255    def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]:
2256        return (
2257            (not schema and self._parse_function())
2258            or self._parse_id_var(any_token=False)
2259            or self._parse_string_as_identifier()
2260            or self._parse_placeholder()
2261        )
2262
2263    def _parse_table_parts(self, schema: bool = False) -> exp.Expression:
2264        catalog = None
2265        db = None
2266        table = self._parse_table_part(schema=schema)
2267
2268        while self._match(TokenType.DOT):
2269            if catalog:
2270                # This allows nesting the table in arbitrarily many dot expressions if needed
2271                table = self.expression(
2272                    exp.Dot, this=table, expression=self._parse_table_part(schema=schema)
2273                )
2274            else:
2275                catalog = db
2276                db = table
2277                table = self._parse_table_part(schema=schema)
2278
2279        if not table:
2280            self.raise_error(f"Expected table name but got {self._curr}")
2281
2282        return self.expression(
2283            exp.Table, this=table, db=db, catalog=catalog, pivots=self._parse_pivots()
2284        )
2285
2286    def _parse_table(
2287        self, schema: bool = False, alias_tokens: t.Optional[t.Collection[TokenType]] = None
2288    ) -> t.Optional[exp.Expression]:
2289        lateral = self._parse_lateral()
2290        if lateral:
2291            return lateral
2292
2293        unnest = self._parse_unnest()
2294        if unnest:
2295            return unnest
2296
2297        values = self._parse_derived_table_values()
2298        if values:
2299            return values
2300
2301        subquery = self._parse_select(table=True)
2302        if subquery:
2303            if not subquery.args.get("pivots"):
2304                subquery.set("pivots", self._parse_pivots())
2305            return subquery
2306
2307        this = self._parse_table_parts(schema=schema)
2308
2309        if schema:
2310            return self._parse_schema(this=this)
2311
2312        if self.alias_post_tablesample:
2313            table_sample = self._parse_table_sample()
2314
2315        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
2316        if alias:
2317            this.set("alias", alias)
2318
2319        if not this.args.get("pivots"):
2320            this.set("pivots", self._parse_pivots())
2321
2322        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
2323            this.set(
2324                "hints",
2325                self._parse_csv(lambda: self._parse_function() or self._parse_var(any_token=True)),
2326            )
2327            self._match_r_paren()
2328
2329        if not self.alias_post_tablesample:
2330            table_sample = self._parse_table_sample()
2331
2332        if table_sample:
2333            table_sample.set("this", this)
2334            this = table_sample
2335
2336        return this
2337
2338    def _parse_unnest(self) -> t.Optional[exp.Expression]:
2339        if not self._match(TokenType.UNNEST):
2340            return None
2341
2342        expressions = self._parse_wrapped_csv(self._parse_type)
2343        ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
2344        alias = self._parse_table_alias()
2345
2346        if alias and self.unnest_column_only:
2347            if alias.args.get("columns"):
2348                self.raise_error("Unexpected extra column alias in unnest.")
2349            alias.set("columns", [alias.this])
2350            alias.set("this", None)
2351
2352        offset = None
2353        if self._match_pair(TokenType.WITH, TokenType.OFFSET):
2354            self._match(TokenType.ALIAS)
2355            offset = self._parse_id_var() or exp.Identifier(this="offset")
2356
2357        return self.expression(
2358            exp.Unnest,
2359            expressions=expressions,
2360            ordinality=ordinality,
2361            alias=alias,
2362            offset=offset,
2363        )
2364
2365    def _parse_derived_table_values(self) -> t.Optional[exp.Expression]:
2366        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
2367        if not is_derived and not self._match(TokenType.VALUES):
2368            return None
2369
2370        expressions = self._parse_csv(self._parse_value)
2371
2372        if is_derived:
2373            self._match_r_paren()
2374
2375        return self.expression(exp.Values, expressions=expressions, alias=self._parse_table_alias())
2376
2377    def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.Expression]:
2378        if not self._match(TokenType.TABLE_SAMPLE) and not (
2379            as_modifier and self._match_text_seq("USING", "SAMPLE")
2380        ):
2381            return None
2382
2383        bucket_numerator = None
2384        bucket_denominator = None
2385        bucket_field = None
2386        percent = None
2387        rows = None
2388        size = None
2389        seed = None
2390
2391        kind = (
2392            self._prev.text if self._prev.token_type == TokenType.TABLE_SAMPLE else "USING SAMPLE"
2393        )
2394        method = self._parse_var(tokens=(TokenType.ROW,))
2395
2396        self._match(TokenType.L_PAREN)
2397
2398        num = self._parse_number()
2399
2400        if self._match(TokenType.BUCKET):
2401            bucket_numerator = self._parse_number()
2402            self._match(TokenType.OUT_OF)
2403            bucket_denominator = bucket_denominator = self._parse_number()
2404            self._match(TokenType.ON)
2405            bucket_field = self._parse_field()
2406        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
2407            percent = num
2408        elif self._match(TokenType.ROWS):
2409            rows = num
2410        else:
2411            size = num
2412
2413        self._match(TokenType.R_PAREN)
2414
2415        if self._match(TokenType.L_PAREN):
2416            method = self._parse_var()
2417            seed = self._match(TokenType.COMMA) and self._parse_number()
2418            self._match_r_paren()
2419        elif self._match_texts(("SEED", "REPEATABLE")):
2420            seed = self._parse_wrapped(self._parse_number)
2421
2422        return self.expression(
2423            exp.TableSample,
2424            method=method,
2425            bucket_numerator=bucket_numerator,
2426            bucket_denominator=bucket_denominator,
2427            bucket_field=bucket_field,
2428            percent=percent,
2429            rows=rows,
2430            size=size,
2431            seed=seed,
2432            kind=kind,
2433        )
2434
2435    def _parse_pivots(self) -> t.List[t.Optional[exp.Expression]]:
2436        return list(iter(self._parse_pivot, None))
2437
2438    def _parse_pivot(self) -> t.Optional[exp.Expression]:
2439        index = self._index
2440
2441        if self._match(TokenType.PIVOT):
2442            unpivot = False
2443        elif self._match(TokenType.UNPIVOT):
2444            unpivot = True
2445        else:
2446            return None
2447
2448        expressions = []
2449        field = None
2450
2451        if not self._match(TokenType.L_PAREN):
2452            self._retreat(index)
2453            return None
2454
2455        if unpivot:
2456            expressions = self._parse_csv(self._parse_column)
2457        else:
2458            expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function()))
2459
2460        if not expressions:
2461            self.raise_error("Failed to parse PIVOT's aggregation list")
2462
2463        if not self._match(TokenType.FOR):
2464            self.raise_error("Expecting FOR")
2465
2466        value = self._parse_column()
2467
2468        if not self._match(TokenType.IN):
2469            self.raise_error("Expecting IN")
2470
2471        field = self._parse_in(value)
2472
2473        self._match_r_paren()
2474
2475        pivot = self.expression(exp.Pivot, expressions=expressions, field=field, unpivot=unpivot)
2476
2477        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
2478            pivot.set("alias", self._parse_table_alias())
2479
2480        if not unpivot:
2481            names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions))
2482
2483            columns: t.List[exp.Expression] = []
2484            for fld in pivot.args["field"].expressions:
2485                field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
2486                for name in names:
2487                    if self.PREFIXED_PIVOT_COLUMNS:
2488                        name = f"{name}_{field_name}" if name else field_name
2489                    else:
2490                        name = f"{field_name}_{name}" if name else field_name
2491
2492                    columns.append(exp.to_identifier(name))
2493
2494            pivot.set("columns", columns)
2495
2496        return pivot
2497
2498    def _pivot_column_names(self, pivot_columns: t.List[exp.Expression]) -> t.List[str]:
2499        return [agg.alias for agg in pivot_columns]
2500
2501    def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Expression]:
2502        if not skip_where_token and not self._match(TokenType.WHERE):
2503            return None
2504
2505        return self.expression(
2506            exp.Where, comments=self._prev_comments, this=self._parse_conjunction()
2507        )
2508
2509    def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Expression]:
2510        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
2511            return None
2512
2513        elements = defaultdict(list)
2514
2515        while True:
2516            expressions = self._parse_csv(self._parse_conjunction)
2517            if expressions:
2518                elements["expressions"].extend(expressions)
2519
2520            grouping_sets = self._parse_grouping_sets()
2521            if grouping_sets:
2522                elements["grouping_sets"].extend(grouping_sets)
2523
2524            rollup = None
2525            cube = None
2526            totals = None
2527
2528            with_ = self._match(TokenType.WITH)
2529            if self._match(TokenType.ROLLUP):
2530                rollup = with_ or self._parse_wrapped_csv(self._parse_column)
2531                elements["rollup"].extend(ensure_list(rollup))
2532
2533            if self._match(TokenType.CUBE):
2534                cube = with_ or self._parse_wrapped_csv(self._parse_column)
2535                elements["cube"].extend(ensure_list(cube))
2536
2537            if self._match_text_seq("TOTALS"):
2538                totals = True
2539                elements["totals"] = True  # type: ignore
2540
2541            if not (grouping_sets or rollup or cube or totals):
2542                break
2543
2544        return self.expression(exp.Group, **elements)  # type: ignore
2545
2546    def _parse_grouping_sets(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
2547        if not self._match(TokenType.GROUPING_SETS):
2548            return None
2549
2550        return self._parse_wrapped_csv(self._parse_grouping_set)
2551
2552    def _parse_grouping_set(self) -> t.Optional[exp.Expression]:
2553        if self._match(TokenType.L_PAREN):
2554            grouping_set = self._parse_csv(self._parse_column)
2555            self._match_r_paren()
2556            return self.expression(exp.Tuple, expressions=grouping_set)
2557
2558        return self._parse_column()
2559
2560    def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Expression]:
2561        if not skip_having_token and not self._match(TokenType.HAVING):
2562            return None
2563        return self.expression(exp.Having, this=self._parse_conjunction())
2564
2565    def _parse_qualify(self) -> t.Optional[exp.Expression]:
2566        if not self._match(TokenType.QUALIFY):
2567            return None
2568        return self.expression(exp.Qualify, this=self._parse_conjunction())
2569
2570    def _parse_order(
2571        self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False
2572    ) -> t.Optional[exp.Expression]:
2573        if not skip_order_token and not self._match(TokenType.ORDER_BY):
2574            return this
2575
2576        return self.expression(
2577            exp.Order, this=this, expressions=self._parse_csv(self._parse_ordered)
2578        )
2579
2580    def _parse_sort(
2581        self, token_type: TokenType, exp_class: t.Type[exp.Expression]
2582    ) -> t.Optional[exp.Expression]:
2583        if not self._match(token_type):
2584            return None
2585        return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered))
2586
2587    def _parse_ordered(self) -> exp.Expression:
2588        this = self._parse_conjunction()
2589        self._match(TokenType.ASC)
2590        is_desc = self._match(TokenType.DESC)
2591        is_nulls_first = self._match(TokenType.NULLS_FIRST)
2592        is_nulls_last = self._match(TokenType.NULLS_LAST)
2593        desc = is_desc or False
2594        asc = not desc
2595        nulls_first = is_nulls_first or False
2596        explicitly_null_ordered = is_nulls_first or is_nulls_last
2597        if (
2598            not explicitly_null_ordered
2599            and (
2600                (asc and self.null_ordering == "nulls_are_small")
2601                or (desc and self.null_ordering != "nulls_are_small")
2602            )
2603            and self.null_ordering != "nulls_are_last"
2604        ):
2605            nulls_first = True
2606
2607        return self.expression(exp.Ordered, this=this, desc=desc, nulls_first=nulls_first)
2608
2609    def _parse_limit(
2610        self, this: t.Optional[exp.Expression] = None, top: bool = False
2611    ) -> t.Optional[exp.Expression]:
2612        if self._match(TokenType.TOP if top else TokenType.LIMIT):
2613            limit_paren = self._match(TokenType.L_PAREN)
2614            limit_exp = self.expression(
2615                exp.Limit, this=this, expression=self._parse_number() if top else self._parse_term()
2616            )
2617
2618            if limit_paren:
2619                self._match_r_paren()
2620
2621            return limit_exp
2622
2623        if self._match(TokenType.FETCH):
2624            direction = self._match_set((TokenType.FIRST, TokenType.NEXT))
2625            direction = self._prev.text if direction else "FIRST"
2626
2627            count = self._parse_number()
2628            percent = self._match(TokenType.PERCENT)
2629
2630            self._match_set((TokenType.ROW, TokenType.ROWS))
2631
2632            only = self._match(TokenType.ONLY)
2633            with_ties = self._match_text_seq("WITH", "TIES")
2634
2635            if only and with_ties:
2636                self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause")
2637
2638            return self.expression(
2639                exp.Fetch,
2640                direction=direction,
2641                count=count,
2642                percent=percent,
2643                with_ties=with_ties,
2644            )
2645
2646        return this
2647
2648    def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
2649        if not self._match_set((TokenType.OFFSET, TokenType.COMMA)):
2650            return this
2651
2652        count = self._parse_number()
2653        self._match_set((TokenType.ROW, TokenType.ROWS))
2654        return self.expression(exp.Offset, this=this, expression=count)
2655
2656    def _parse_lock(self) -> t.Optional[exp.Expression]:
2657        if self._match_text_seq("FOR", "UPDATE"):
2658            return self.expression(exp.Lock, update=True)
2659        if self._match_text_seq("FOR", "SHARE"):
2660            return self.expression(exp.Lock, update=False)
2661
2662        return None
2663
2664    def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2665        if not self._match_set(self.SET_OPERATIONS):
2666            return this
2667
2668        token_type = self._prev.token_type
2669
2670        if token_type == TokenType.UNION:
2671            expression = exp.Union
2672        elif token_type == TokenType.EXCEPT:
2673            expression = exp.Except
2674        else:
2675            expression = exp.Intersect
2676
2677        return self.expression(
2678            expression,
2679            this=this,
2680            distinct=self._match(TokenType.DISTINCT) or not self._match(TokenType.ALL),
2681            expression=self._parse_set_operations(self._parse_select(nested=True)),
2682        )
2683
2684    def _parse_expression(self, explicit_alias: bool = False) -> t.Optional[exp.Expression]:
2685        return self._parse_alias(self._parse_conjunction(), explicit=explicit_alias)
2686
2687    def _parse_conjunction(self) -> t.Optional[exp.Expression]:
2688        return self._parse_tokens(self._parse_equality, self.CONJUNCTION)
2689
2690    def _parse_equality(self) -> t.Optional[exp.Expression]:
2691        return self._parse_tokens(self._parse_comparison, self.EQUALITY)
2692
2693    def _parse_comparison(self) -> t.Optional[exp.Expression]:
2694        return self._parse_tokens(self._parse_range, self.COMPARISON)
2695
2696    def _parse_range(self) -> t.Optional[exp.Expression]:
2697        this = self._parse_bitwise()
2698        negate = self._match(TokenType.NOT)
2699
2700        if self._match_set(self.RANGE_PARSERS):
2701            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
2702            if not expression:
2703                return this
2704
2705            this = expression
2706        elif self._match(TokenType.ISNULL):
2707            this = self.expression(exp.Is, this=this, expression=exp.Null())
2708
2709        # Postgres supports ISNULL and NOTNULL for conditions.
2710        # https://blog.andreiavram.ro/postgresql-null-composite-type/
2711        if self._match(TokenType.NOTNULL):
2712            this = self.expression(exp.Is, this=this, expression=exp.Null())
2713            this = self.expression(exp.Not, this=this)
2714
2715        if negate:
2716            this = self.expression(exp.Not, this=this)
2717
2718        if self._match(TokenType.IS):
2719            this = self._parse_is(this)
2720
2721        return this
2722
2723    def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2724        index = self._index - 1
2725        negate = self._match(TokenType.NOT)
2726        if self._match(TokenType.DISTINCT_FROM):
2727            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
2728            return self.expression(klass, this=this, expression=self._parse_expression())
2729
2730        expression = self._parse_null() or self._parse_boolean()
2731        if not expression:
2732            self._retreat(index)
2733            return None
2734
2735        this = self.expression(exp.Is, this=this, expression=expression)
2736        return self.expression(exp.Not, this=this) if negate else this
2737
2738    def _parse_in(self, this: t.Optional[exp.Expression]) -> exp.Expression:
2739        unnest = self._parse_unnest()
2740        if unnest:
2741            this = self.expression(exp.In, this=this, unnest=unnest)
2742        elif self._match(TokenType.L_PAREN):
2743            expressions = self._parse_csv(self._parse_select_or_expression)
2744
2745            if len(expressions) == 1 and isinstance(expressions[0], exp.Subqueryable):
2746                this = self.expression(exp.In, this=this, query=expressions[0])
2747            else:
2748                this = self.expression(exp.In, this=this, expressions=expressions)
2749
2750            self._match_r_paren(this)
2751        else:
2752            this = self.expression(exp.In, this=this, field=self._parse_field())
2753
2754        return this
2755
2756    def _parse_between(self, this: exp.Expression) -> exp.Expression:
2757        low = self._parse_bitwise()
2758        self._match(TokenType.AND)
2759        high = self._parse_bitwise()
2760        return self.expression(exp.Between, this=this, low=low, high=high)
2761
2762    def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2763        if not self._match(TokenType.ESCAPE):
2764            return this
2765        return self.expression(exp.Escape, this=this, expression=self._parse_string())
2766
2767    def _parse_interval(self) -> t.Optional[exp.Expression]:
2768        if not self._match(TokenType.INTERVAL):
2769            return None
2770
2771        this = self._parse_primary() or self._parse_term()
2772        unit = self._parse_function() or self._parse_var()
2773
2774        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
2775        # each INTERVAL expression into this canonical form so it's easy to transpile
2776        if this and isinstance(this, exp.Literal):
2777            if this.is_number:
2778                this = exp.Literal.string(this.name)
2779
2780            # Try to not clutter Snowflake's multi-part intervals like INTERVAL '1 day, 1 year'
2781            parts = this.name.split()
2782            if not unit and len(parts) <= 2:
2783                this = exp.Literal.string(seq_get(parts, 0))
2784                unit = self.expression(exp.Var, this=seq_get(parts, 1))
2785
2786        return self.expression(exp.Interval, this=this, unit=unit)
2787
2788    def _parse_bitwise(self) -> t.Optional[exp.Expression]:
2789        this = self._parse_term()
2790
2791        while True:
2792            if self._match_set(self.BITWISE):
2793                this = self.expression(
2794                    self.BITWISE[self._prev.token_type],
2795                    this=this,
2796                    expression=self._parse_term(),
2797                )
2798            elif self._match_pair(TokenType.LT, TokenType.LT):
2799                this = self.expression(
2800                    exp.BitwiseLeftShift, this=this, expression=self._parse_term()
2801                )
2802            elif self._match_pair(TokenType.GT, TokenType.GT):
2803                this = self.expression(
2804                    exp.BitwiseRightShift, this=this, expression=self._parse_term()
2805                )
2806            else:
2807                break
2808
2809        return this
2810
2811    def _parse_term(self) -> t.Optional[exp.Expression]:
2812        return self._parse_tokens(self._parse_factor, self.TERM)
2813
2814    def _parse_factor(self) -> t.Optional[exp.Expression]:
2815        return self._parse_tokens(self._parse_unary, self.FACTOR)
2816
2817    def _parse_unary(self) -> t.Optional[exp.Expression]:
2818        if self._match_set(self.UNARY_PARSERS):
2819            return self.UNARY_PARSERS[self._prev.token_type](self)
2820        return self._parse_at_time_zone(self._parse_type())
2821
2822    def _parse_type(self) -> t.Optional[exp.Expression]:
2823        interval = self._parse_interval()
2824        if interval:
2825            return interval
2826
2827        index = self._index
2828        data_type = self._parse_types(check_func=True)
2829        this = self._parse_column()
2830
2831        if data_type:
2832            if isinstance(this, exp.Literal):
2833                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
2834                if parser:
2835                    return parser(self, this, data_type)
2836                return self.expression(exp.Cast, this=this, to=data_type)
2837            if not data_type.expressions:
2838                self._retreat(index)
2839                return self._parse_column()
2840            return data_type
2841
2842        return this
2843
2844    def _parse_type_size(self) -> t.Optional[exp.Expression]:
2845        this = self._parse_type()
2846        if not this:
2847            return None
2848
2849        return self.expression(
2850            exp.DataTypeSize, this=this, expression=self._parse_var(any_token=True)
2851        )
2852
2853    def _parse_types(self, check_func: bool = False) -> t.Optional[exp.Expression]:
2854        index = self._index
2855
2856        prefix = self._match_text_seq("SYSUDTLIB", ".")
2857
2858        if not self._match_set(self.TYPE_TOKENS):
2859            return None
2860
2861        type_token = self._prev.token_type
2862
2863        if type_token == TokenType.PSEUDO_TYPE:
2864            return self.expression(exp.PseudoType, this=self._prev.text)
2865
2866        nested = type_token in self.NESTED_TYPE_TOKENS
2867        is_struct = type_token == TokenType.STRUCT
2868        expressions = None
2869        maybe_func = False
2870
2871        if self._match(TokenType.L_PAREN):
2872            if is_struct:
2873                expressions = self._parse_csv(self._parse_struct_types)
2874            elif nested:
2875                expressions = self._parse_csv(self._parse_types)
2876            else:
2877                expressions = self._parse_csv(self._parse_type_size)
2878
2879            if not expressions or not self._match(TokenType.R_PAREN):
2880                self._retreat(index)
2881                return None
2882
2883            maybe_func = True
2884
2885        if self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2886            this = exp.DataType(
2887                this=exp.DataType.Type.ARRAY,
2888                expressions=[exp.DataType.build(type_token.value, expressions=expressions)],
2889                nested=True,
2890            )
2891
2892            while self._match_pair(TokenType.L_BRACKET, TokenType.R_BRACKET):
2893                this = exp.DataType(
2894                    this=exp.DataType.Type.ARRAY,
2895                    expressions=[this],
2896                    nested=True,
2897                )
2898
2899            return this
2900
2901        if self._match(TokenType.L_BRACKET):
2902            self._retreat(index)
2903            return None
2904
2905        values: t.Optional[t.List[t.Optional[exp.Expression]]] = None
2906        if nested and self._match(TokenType.LT):
2907            if is_struct:
2908                expressions = self._parse_csv(self._parse_struct_types)
2909            else:
2910                expressions = self._parse_csv(self._parse_types)
2911
2912            if not self._match(TokenType.GT):
2913                self.raise_error("Expecting >")
2914
2915            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
2916                values = self._parse_csv(self._parse_conjunction)
2917                self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
2918
2919        value: t.Optional[exp.Expression] = None
2920        if type_token in self.TIMESTAMPS:
2921            if self._match(TokenType.WITH_TIME_ZONE) or type_token == TokenType.TIMESTAMPTZ:
2922                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPTZ, expressions=expressions)
2923            elif (
2924                self._match(TokenType.WITH_LOCAL_TIME_ZONE) or type_token == TokenType.TIMESTAMPLTZ
2925            ):
2926                value = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions)
2927            elif self._match(TokenType.WITHOUT_TIME_ZONE):
2928                if type_token == TokenType.TIME:
2929                    value = exp.DataType(this=exp.DataType.Type.TIME, expressions=expressions)
2930                else:
2931                    value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2932
2933            maybe_func = maybe_func and value is None
2934
2935            if value is None:
2936                value = exp.DataType(this=exp.DataType.Type.TIMESTAMP, expressions=expressions)
2937        elif type_token == TokenType.INTERVAL:
2938            unit = self._parse_var()
2939
2940            if not unit:
2941                value = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL)
2942            else:
2943                value = self.expression(exp.Interval, unit=unit)
2944
2945        if maybe_func and check_func:
2946            index2 = self._index
2947            peek = self._parse_string()
2948
2949            if not peek:
2950                self._retreat(index)
2951                return None
2952
2953            self._retreat(index2)
2954
2955        if value:
2956            return value
2957
2958        return exp.DataType(
2959            this=exp.DataType.Type[type_token.value.upper()],
2960            expressions=expressions,
2961            nested=nested,
2962            values=values,
2963            prefix=prefix,
2964        )
2965
2966    def _parse_struct_types(self) -> t.Optional[exp.Expression]:
2967        this = self._parse_type() or self._parse_id_var()
2968        self._match(TokenType.COLON)
2969        return self._parse_column_def(this)
2970
2971    def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
2972        if not self._match(TokenType.AT_TIME_ZONE):
2973            return this
2974        return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary())
2975
2976    def _parse_column(self) -> t.Optional[exp.Expression]:
2977        this = self._parse_field()
2978        if isinstance(this, exp.Identifier):
2979            this = self.expression(exp.Column, this=this)
2980        elif not this:
2981            return self._parse_bracket(this)
2982        this = self._parse_bracket(this)
2983
2984        while self._match_set(self.COLUMN_OPERATORS):
2985            op_token = self._prev.token_type
2986            op = self.COLUMN_OPERATORS.get(op_token)
2987
2988            if op_token == TokenType.DCOLON:
2989                field = self._parse_types()
2990                if not field:
2991                    self.raise_error("Expected type")
2992            elif op and self._curr:
2993                self._advance()
2994                value = self._prev.text
2995                field = (
2996                    exp.Literal.number(value)
2997                    if self._prev.token_type == TokenType.NUMBER
2998                    else exp.Literal.string(value)
2999                )
3000            else:
3001                field = (
3002                    self._parse_star()
3003                    or self._parse_function(anonymous=True)
3004                    or self._parse_id_var()
3005                )
3006
3007            if isinstance(field, exp.Func):
3008                # bigquery allows function calls like x.y.count(...)
3009                # SAFE.SUBSTR(...)
3010                # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
3011                this = self._replace_columns_with_dots(this)
3012
3013            if op:
3014                this = op(self, this, field)
3015            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
3016                this = self.expression(
3017                    exp.Column,
3018                    this=field,
3019                    table=this.this,
3020                    db=this.args.get("table"),
3021                    catalog=this.args.get("db"),
3022                )
3023            else:
3024                this = self.expression(exp.Dot, this=this, expression=field)
3025            this = self._parse_bracket(this)
3026
3027        return this
3028
3029    def _parse_primary(self) -> t.Optional[exp.Expression]:
3030        if self._match_set(self.PRIMARY_PARSERS):
3031            token_type = self._prev.token_type
3032            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
3033
3034            if token_type == TokenType.STRING:
3035                expressions = [primary]
3036                while self._match(TokenType.STRING):
3037                    expressions.append(exp.Literal.string(self._prev.text))
3038                if len(expressions) > 1:
3039                    return self.expression(exp.Concat, expressions=expressions)
3040            return primary
3041
3042        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
3043            return exp.Literal.number(f"0.{self._prev.text}")
3044
3045        if self._match(TokenType.L_PAREN):
3046            comments = self._prev_comments
3047            query = self._parse_select()
3048
3049            if query:
3050                expressions = [query]
3051            else:
3052                expressions = self._parse_csv(lambda: self._parse_expression(explicit_alias=True))
3053
3054            this = self._parse_query_modifiers(seq_get(expressions, 0))
3055
3056            if isinstance(this, exp.Subqueryable):
3057                this = self._parse_set_operations(
3058                    self._parse_subquery(this=this, parse_alias=False)
3059                )
3060            elif len(expressions) > 1:
3061                this = self.expression(exp.Tuple, expressions=expressions)
3062            else:
3063                this = self.expression(exp.Paren, this=self._parse_set_operations(this))
3064
3065            if this:
3066                this.add_comments(comments)
3067            self._match_r_paren(expression=this)
3068
3069            return this
3070
3071        return None
3072
3073    def _parse_field(
3074        self,
3075        any_token: bool = False,
3076        tokens: t.Optional[t.Collection[TokenType]] = None,
3077    ) -> t.Optional[exp.Expression]:
3078        return (
3079            self._parse_primary()
3080            or self._parse_function()
3081            or self._parse_id_var(any_token=any_token, tokens=tokens)
3082        )
3083
3084    def _parse_function(
3085        self, functions: t.Optional[t.Dict[str, t.Callable]] = None, anonymous: bool = False
3086    ) -> t.Optional[exp.Expression]:
3087        if not self._curr:
3088            return None
3089
3090        token_type = self._curr.token_type
3091
3092        if self._match_set(self.NO_PAREN_FUNCTION_PARSERS):
3093            return self.NO_PAREN_FUNCTION_PARSERS[token_type](self)
3094
3095        if not self._next or self._next.token_type != TokenType.L_PAREN:
3096            if token_type in self.NO_PAREN_FUNCTIONS:
3097                self._advance()
3098                return self.expression(self.NO_PAREN_FUNCTIONS[token_type])
3099
3100            return None
3101
3102        if token_type not in self.FUNC_TOKENS:
3103            return None
3104
3105        this = self._curr.text
3106        upper = this.upper()
3107        self._advance(2)
3108
3109        parser = self.FUNCTION_PARSERS.get(upper)
3110
3111        if parser and not anonymous:
3112            this = parser(self)
3113        else:
3114            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
3115
3116            if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH):
3117                this = self.expression(subquery_predicate, this=self._parse_select())
3118                self._match_r_paren()
3119                return this
3120
3121            if functions is None:
3122                functions = self.FUNCTIONS
3123
3124            function = functions.get(upper)
3125            args = self._parse_csv(self._parse_lambda)
3126
3127            if function and not anonymous:
3128                this = function(args)
3129                self.validate_expression(this, args)
3130            else:
3131                this = self.expression(exp.Anonymous, this=this, expressions=args)
3132
3133        self._match_r_paren(this)
3134        return self._parse_window(this)
3135
3136    def _parse_function_parameter(self) -> t.Optional[exp.Expression]:
3137        return self._parse_column_def(self._parse_id_var())
3138
3139    def _parse_user_defined_function(
3140        self, kind: t.Optional[TokenType] = None
3141    ) -> t.Optional[exp.Expression]:
3142        this = self._parse_id_var()
3143
3144        while self._match(TokenType.DOT):
3145            this = self.expression(exp.Dot, this=this, expression=self._parse_id_var())
3146
3147        if not self._match(TokenType.L_PAREN):
3148            return this
3149
3150        expressions = self._parse_csv(self._parse_function_parameter)
3151        self._match_r_paren()
3152        return self.expression(
3153            exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True
3154        )
3155
3156    def _parse_introducer(self, token: Token) -> t.Optional[exp.Expression]:
3157        literal = self._parse_primary()
3158        if literal:
3159            return self.expression(exp.Introducer, this=token.text, expression=literal)
3160
3161        return self.expression(exp.Identifier, this=token.text)
3162
3163    def _parse_national(self, token: Token) -> exp.Expression:
3164        return self.expression(exp.National, this=exp.Literal.string(token.text))
3165
3166    def _parse_session_parameter(self) -> exp.Expression:
3167        kind = None
3168        this = self._parse_id_var() or self._parse_primary()
3169
3170        if this and self._match(TokenType.DOT):
3171            kind = this.name
3172            this = self._parse_var() or self._parse_primary()
3173
3174        return self.expression(exp.SessionParameter, this=this, kind=kind)
3175
3176    def _parse_lambda(self) -> t.Optional[exp.Expression]:
3177        index = self._index
3178
3179        if self._match(TokenType.L_PAREN):
3180            expressions = self._parse_csv(self._parse_id_var)
3181
3182            if not self._match(TokenType.R_PAREN):
3183                self._retreat(index)
3184        else:
3185            expressions = [self._parse_id_var()]
3186
3187        if self._match_set(self.LAMBDAS):
3188            return self.LAMBDAS[self._prev.token_type](self, expressions)
3189
3190        self._retreat(index)
3191
3192        this: t.Optional[exp.Expression]
3193
3194        if self._match(TokenType.DISTINCT):
3195            this = self.expression(
3196                exp.Distinct, expressions=self._parse_csv(self._parse_conjunction)
3197            )
3198        else:
3199            this = self._parse_select_or_expression()
3200
3201            if isinstance(this, exp.EQ):
3202                left = this.this
3203                if isinstance(left, exp.Column):
3204                    left.replace(exp.Var(this=left.text("this")))
3205
3206        return self._parse_limit(self._parse_order(self._parse_respect_or_ignore_nulls(this)))
3207
3208    def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]:
3209        index = self._index
3210
3211        try:
3212            if self._parse_select(nested=True):
3213                return this
3214        except Exception:
3215            pass
3216        finally:
3217            self._retreat(index)
3218
3219        if not self._match(TokenType.L_PAREN):
3220            return this
3221
3222        args = self._parse_csv(
3223            lambda: self._parse_constraint()
3224            or self._parse_column_def(self._parse_field(any_token=True))
3225        )
3226        self._match_r_paren()
3227        return self.expression(exp.Schema, this=this, expressions=args)
3228
3229    def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3230        # column defs are not really columns, they're identifiers
3231        if isinstance(this, exp.Column):
3232            this = this.this
3233        kind = self._parse_types()
3234
3235        if self._match_text_seq("FOR", "ORDINALITY"):
3236            return self.expression(exp.ColumnDef, this=this, ordinality=True)
3237
3238        constraints = []
3239        while True:
3240            constraint = self._parse_column_constraint()
3241            if not constraint:
3242                break
3243            constraints.append(constraint)
3244
3245        if not kind and not constraints:
3246            return this
3247
3248        return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints)
3249
3250    def _parse_auto_increment(self) -> exp.Expression:
3251        start = None
3252        increment = None
3253
3254        if self._match(TokenType.L_PAREN, advance=False):
3255            args = self._parse_wrapped_csv(self._parse_bitwise)
3256            start = seq_get(args, 0)
3257            increment = seq_get(args, 1)
3258        elif self._match_text_seq("START"):
3259            start = self._parse_bitwise()
3260            self._match_text_seq("INCREMENT")
3261            increment = self._parse_bitwise()
3262
3263        if start and increment:
3264            return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment)
3265
3266        return exp.AutoIncrementColumnConstraint()
3267
3268    def _parse_compress(self) -> exp.Expression:
3269        if self._match(TokenType.L_PAREN, advance=False):
3270            return self.expression(
3271                exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise)
3272            )
3273
3274        return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise())
3275
3276    def _parse_generated_as_identity(self) -> exp.Expression:
3277        if self._match(TokenType.BY_DEFAULT):
3278            on_null = self._match_pair(TokenType.ON, TokenType.NULL)
3279            this = self.expression(
3280                exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null
3281            )
3282        else:
3283            self._match_text_seq("ALWAYS")
3284            this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True)
3285
3286        self._match_text_seq("AS", "IDENTITY")
3287        if self._match(TokenType.L_PAREN):
3288            if self._match_text_seq("START", "WITH"):
3289                this.set("start", self._parse_bitwise())
3290            if self._match_text_seq("INCREMENT", "BY"):
3291                this.set("increment", self._parse_bitwise())
3292            if self._match_text_seq("MINVALUE"):
3293                this.set("minvalue", self._parse_bitwise())
3294            if self._match_text_seq("MAXVALUE"):
3295                this.set("maxvalue", self._parse_bitwise())
3296
3297            if self._match_text_seq("CYCLE"):
3298                this.set("cycle", True)
3299            elif self._match_text_seq("NO", "CYCLE"):
3300                this.set("cycle", False)
3301
3302            self._match_r_paren()
3303
3304        return this
3305
3306    def _parse_inline(self) -> t.Optional[exp.Expression]:
3307        self._match_text_seq("LENGTH")
3308        return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise())
3309
3310    def _parse_not_constraint(self) -> t.Optional[exp.Expression]:
3311        if self._match_text_seq("NULL"):
3312            return self.expression(exp.NotNullColumnConstraint)
3313        if self._match_text_seq("CASESPECIFIC"):
3314            return self.expression(exp.CaseSpecificColumnConstraint, not_=True)
3315        return None
3316
3317    def _parse_column_constraint(self) -> t.Optional[exp.Expression]:
3318        if self._match(TokenType.CONSTRAINT):
3319            this = self._parse_id_var()
3320        else:
3321            this = None
3322
3323        if self._match_texts(self.CONSTRAINT_PARSERS):
3324            return self.expression(
3325                exp.ColumnConstraint,
3326                this=this,
3327                kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self),
3328            )
3329
3330        return this
3331
3332    def _parse_constraint(self) -> t.Optional[exp.Expression]:
3333        if not self._match(TokenType.CONSTRAINT):
3334            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
3335
3336        this = self._parse_id_var()
3337        expressions = []
3338
3339        while True:
3340            constraint = self._parse_unnamed_constraint() or self._parse_function()
3341            if not constraint:
3342                break
3343            expressions.append(constraint)
3344
3345        return self.expression(exp.Constraint, this=this, expressions=expressions)
3346
3347    def _parse_unnamed_constraint(
3348        self, constraints: t.Optional[t.Collection[str]] = None
3349    ) -> t.Optional[exp.Expression]:
3350        if not self._match_texts(constraints or self.CONSTRAINT_PARSERS):
3351            return None
3352
3353        constraint = self._prev.text.upper()
3354        if constraint not in self.CONSTRAINT_PARSERS:
3355            self.raise_error(f"No parser found for schema constraint {constraint}.")
3356
3357        return self.CONSTRAINT_PARSERS[constraint](self)
3358
3359    def _parse_unique(self) -> exp.Expression:
3360        if not self._match(TokenType.L_PAREN, advance=False):
3361            return self.expression(exp.UniqueColumnConstraint)
3362        return self.expression(exp.Unique, expressions=self._parse_wrapped_id_vars())
3363
3364    def _parse_key_constraint_options(self) -> t.List[str]:
3365        options = []
3366        while True:
3367            if not self._curr:
3368                break
3369
3370            if self._match(TokenType.ON):
3371                action = None
3372                on = self._advance_any() and self._prev.text
3373
3374                if self._match(TokenType.NO_ACTION):
3375                    action = "NO ACTION"
3376                elif self._match(TokenType.CASCADE):
3377                    action = "CASCADE"
3378                elif self._match_pair(TokenType.SET, TokenType.NULL):
3379                    action = "SET NULL"
3380                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
3381                    action = "SET DEFAULT"
3382                else:
3383                    self.raise_error("Invalid key constraint")
3384
3385                options.append(f"ON {on} {action}")
3386            elif self._match_text_seq("NOT", "ENFORCED"):
3387                options.append("NOT ENFORCED")
3388            elif self._match_text_seq("DEFERRABLE"):
3389                options.append("DEFERRABLE")
3390            elif self._match_text_seq("INITIALLY", "DEFERRED"):
3391                options.append("INITIALLY DEFERRED")
3392            elif self._match_text_seq("NORELY"):
3393                options.append("NORELY")
3394            elif self._match_text_seq("MATCH", "FULL"):
3395                options.append("MATCH FULL")
3396            else:
3397                break
3398
3399        return options
3400
3401    def _parse_references(self, match=True) -> t.Optional[exp.Expression]:
3402        if match and not self._match(TokenType.REFERENCES):
3403            return None
3404
3405        expressions = None
3406        this = self._parse_id_var()
3407
3408        if self._match(TokenType.L_PAREN, advance=False):
3409            expressions = self._parse_wrapped_id_vars()
3410
3411        options = self._parse_key_constraint_options()
3412        return self.expression(exp.Reference, this=this, expressions=expressions, options=options)
3413
3414    def _parse_foreign_key(self) -> exp.Expression:
3415        expressions = self._parse_wrapped_id_vars()
3416        reference = self._parse_references()
3417        options = {}
3418
3419        while self._match(TokenType.ON):
3420            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
3421                self.raise_error("Expected DELETE or UPDATE")
3422
3423            kind = self._prev.text.lower()
3424
3425            if self._match(TokenType.NO_ACTION):
3426                action = "NO ACTION"
3427            elif self._match(TokenType.SET):
3428                self._match_set((TokenType.NULL, TokenType.DEFAULT))
3429                action = "SET " + self._prev.text.upper()
3430            else:
3431                self._advance()
3432                action = self._prev.text.upper()
3433
3434            options[kind] = action
3435
3436        return self.expression(
3437            exp.ForeignKey, expressions=expressions, reference=reference, **options  # type: ignore
3438        )
3439
3440    def _parse_primary_key(self) -> exp.Expression:
3441        desc = (
3442            self._match_set((TokenType.ASC, TokenType.DESC))
3443            and self._prev.token_type == TokenType.DESC
3444        )
3445
3446        if not self._match(TokenType.L_PAREN, advance=False):
3447            return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc)
3448
3449        expressions = self._parse_wrapped_csv(self._parse_field)
3450        options = self._parse_key_constraint_options()
3451        return self.expression(exp.PrimaryKey, expressions=expressions, options=options)
3452
3453    def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3454        if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)):
3455            return this
3456
3457        bracket_kind = self._prev.token_type
3458        expressions: t.List[t.Optional[exp.Expression]]
3459
3460        if self._match(TokenType.COLON):
3461            expressions = [self.expression(exp.Slice, expression=self._parse_conjunction())]
3462        else:
3463            expressions = self._parse_csv(lambda: self._parse_slice(self._parse_conjunction()))
3464
3465        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
3466        if bracket_kind == TokenType.L_BRACE:
3467            this = self.expression(exp.Struct, expressions=expressions)
3468        elif not this or this.name.upper() == "ARRAY":
3469            this = self.expression(exp.Array, expressions=expressions)
3470        else:
3471            expressions = apply_index_offset(this, expressions, -self.index_offset)
3472            this = self.expression(exp.Bracket, this=this, expressions=expressions)
3473
3474        if not self._match(TokenType.R_BRACKET) and bracket_kind == TokenType.L_BRACKET:
3475            self.raise_error("Expected ]")
3476        elif not self._match(TokenType.R_BRACE) and bracket_kind == TokenType.L_BRACE:
3477            self.raise_error("Expected }")
3478
3479        self._add_comments(this)
3480        return self._parse_bracket(this)
3481
3482    def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]:
3483        if self._match(TokenType.COLON):
3484            return self.expression(exp.Slice, this=this, expression=self._parse_conjunction())
3485        return this
3486
3487    def _parse_case(self) -> t.Optional[exp.Expression]:
3488        ifs = []
3489        default = None
3490
3491        expression = self._parse_conjunction()
3492
3493        while self._match(TokenType.WHEN):
3494            this = self._parse_conjunction()
3495            self._match(TokenType.THEN)
3496            then = self._parse_conjunction()
3497            ifs.append(self.expression(exp.If, this=this, true=then))
3498
3499        if self._match(TokenType.ELSE):
3500            default = self._parse_conjunction()
3501
3502        if not self._match(TokenType.END):
3503            self.raise_error("Expected END after CASE", self._prev)
3504
3505        return self._parse_window(
3506            self.expression(exp.Case, this=expression, ifs=ifs, default=default)
3507        )
3508
3509    def _parse_if(self) -> t.Optional[exp.Expression]:
3510        if self._match(TokenType.L_PAREN):
3511            args = self._parse_csv(self._parse_conjunction)
3512            this = exp.If.from_arg_list(args)
3513            self.validate_expression(this, args)
3514            self._match_r_paren()
3515        else:
3516            index = self._index - 1
3517            condition = self._parse_conjunction()
3518
3519            if not condition:
3520                self._retreat(index)
3521                return None
3522
3523            self._match(TokenType.THEN)
3524            true = self._parse_conjunction()
3525            false = self._parse_conjunction() if self._match(TokenType.ELSE) else None
3526            self._match(TokenType.END)
3527            this = self.expression(exp.If, this=condition, true=true, false=false)
3528
3529        return self._parse_window(this)
3530
3531    def _parse_extract(self) -> exp.Expression:
3532        this = self._parse_function() or self._parse_var() or self._parse_type()
3533
3534        if self._match(TokenType.FROM):
3535            return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3536
3537        if not self._match(TokenType.COMMA):
3538            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
3539
3540        return self.expression(exp.Extract, this=this, expression=self._parse_bitwise())
3541
3542    def _parse_cast(self, strict: bool) -> exp.Expression:
3543        this = self._parse_conjunction()
3544
3545        if not self._match(TokenType.ALIAS):
3546            if self._match(TokenType.COMMA):
3547                return self.expression(
3548                    exp.CastToStrType, this=this, expression=self._parse_string()
3549                )
3550            else:
3551                self.raise_error("Expected AS after CAST")
3552
3553        to = self._parse_types()
3554
3555        if not to:
3556            self.raise_error("Expected TYPE after CAST")
3557        elif to.this == exp.DataType.Type.CHAR:
3558            if self._match(TokenType.CHARACTER_SET):
3559                to = self.expression(exp.CharacterSet, this=self._parse_var_or_string())
3560
3561        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3562
3563    def _parse_string_agg(self) -> exp.Expression:
3564        expression: t.Optional[exp.Expression]
3565
3566        if self._match(TokenType.DISTINCT):
3567            args = self._parse_csv(self._parse_conjunction)
3568            expression = self.expression(exp.Distinct, expressions=[seq_get(args, 0)])
3569        else:
3570            args = self._parse_csv(self._parse_conjunction)
3571            expression = seq_get(args, 0)
3572
3573        index = self._index
3574        if not self._match(TokenType.R_PAREN):
3575            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
3576            order = self._parse_order(this=expression)
3577            return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3578
3579        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
3580        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
3581        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
3582        if not self._match(TokenType.WITHIN_GROUP):
3583            self._retreat(index)
3584            this = exp.GroupConcat.from_arg_list(args)
3585            self.validate_expression(this, args)
3586            return this
3587
3588        self._match_l_paren()  # The corresponding match_r_paren will be called in parse_function (caller)
3589        order = self._parse_order(this=expression)
3590        return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1))
3591
3592    def _parse_convert(self, strict: bool) -> t.Optional[exp.Expression]:
3593        to: t.Optional[exp.Expression]
3594        this = self._parse_bitwise()
3595
3596        if self._match(TokenType.USING):
3597            to = self.expression(exp.CharacterSet, this=self._parse_var())
3598        elif self._match(TokenType.COMMA):
3599            to = self._parse_bitwise()
3600        else:
3601            to = None
3602
3603        # Swap the argument order if needed to produce the correct AST
3604        if self.CONVERT_TYPE_FIRST:
3605            this, to = to, this
3606
3607        return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to)
3608
3609    def _parse_decode(self) -> t.Optional[exp.Expression]:
3610        """
3611        There are generally two variants of the DECODE function:
3612
3613        - DECODE(bin, charset)
3614        - DECODE(expression, search, result [, search, result] ... [, default])
3615
3616        The second variant will always be parsed into a CASE expression. Note that NULL
3617        needs special treatment, since we need to explicitly check for it with `IS NULL`,
3618        instead of relying on pattern matching.
3619        """
3620        args = self._parse_csv(self._parse_conjunction)
3621
3622        if len(args) < 3:
3623            return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1))
3624
3625        expression, *expressions = args
3626        if not expression:
3627            return None
3628
3629        ifs = []
3630        for search, result in zip(expressions[::2], expressions[1::2]):
3631            if not search or not result:
3632                return None
3633
3634            if isinstance(search, exp.Literal):
3635                ifs.append(
3636                    exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result)
3637                )
3638            elif isinstance(search, exp.Null):
3639                ifs.append(
3640                    exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result)
3641                )
3642            else:
3643                cond = exp.or_(
3644                    exp.EQ(this=expression.copy(), expression=search),
3645                    exp.and_(
3646                        exp.Is(this=expression.copy(), expression=exp.Null()),
3647                        exp.Is(this=search.copy(), expression=exp.Null()),
3648                        copy=False,
3649                    ),
3650                    copy=False,
3651                )
3652                ifs.append(exp.If(this=cond, true=result))
3653
3654        return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None)
3655
3656    def _parse_json_key_value(self) -> t.Optional[exp.Expression]:
3657        self._match_text_seq("KEY")
3658        key = self._parse_field()
3659        self._match(TokenType.COLON)
3660        self._match_text_seq("VALUE")
3661        value = self._parse_field()
3662        if not key and not value:
3663            return None
3664        return self.expression(exp.JSONKeyValue, this=key, expression=value)
3665
3666    def _parse_json_object(self) -> exp.Expression:
3667        expressions = self._parse_csv(self._parse_json_key_value)
3668
3669        null_handling = None
3670        if self._match_text_seq("NULL", "ON", "NULL"):
3671            null_handling = "NULL ON NULL"
3672        elif self._match_text_seq("ABSENT", "ON", "NULL"):
3673            null_handling = "ABSENT ON NULL"
3674
3675        unique_keys = None
3676        if self._match_text_seq("WITH", "UNIQUE"):
3677            unique_keys = True
3678        elif self._match_text_seq("WITHOUT", "UNIQUE"):
3679            unique_keys = False
3680
3681        self._match_text_seq("KEYS")
3682
3683        return_type = self._match_text_seq("RETURNING") and self._parse_type()
3684        format_json = self._match_text_seq("FORMAT", "JSON")
3685        encoding = self._match_text_seq("ENCODING") and self._parse_var()
3686
3687        return self.expression(
3688            exp.JSONObject,
3689            expressions=expressions,
3690            null_handling=null_handling,
3691            unique_keys=unique_keys,
3692            return_type=return_type,
3693            format_json=format_json,
3694            encoding=encoding,
3695        )
3696
3697    def _parse_logarithm(self) -> exp.Expression:
3698        # Default argument order is base, expression
3699        args = self._parse_csv(self._parse_range)
3700
3701        if len(args) > 1:
3702            if not self.LOG_BASE_FIRST:
3703                args.reverse()
3704            return exp.Log.from_arg_list(args)
3705
3706        return self.expression(
3707            exp.Ln if self.LOG_DEFAULTS_TO_LN else exp.Log, this=seq_get(args, 0)
3708        )
3709
3710    def _parse_match_against(self) -> exp.Expression:
3711        expressions = self._parse_csv(self._parse_column)
3712
3713        self._match_text_seq(")", "AGAINST", "(")
3714
3715        this = self._parse_string()
3716
3717        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
3718            modifier = "IN NATURAL LANGUAGE MODE"
3719            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3720                modifier = f"{modifier} WITH QUERY EXPANSION"
3721        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
3722            modifier = "IN BOOLEAN MODE"
3723        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
3724            modifier = "WITH QUERY EXPANSION"
3725        else:
3726            modifier = None
3727
3728        return self.expression(
3729            exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier
3730        )
3731
3732    # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16
3733    def _parse_open_json(self) -> exp.Expression:
3734        this = self._parse_bitwise()
3735        path = self._match(TokenType.COMMA) and self._parse_string()
3736
3737        def _parse_open_json_column_def() -> exp.Expression:
3738            this = self._parse_field(any_token=True)
3739            kind = self._parse_types()
3740            path = self._parse_string()
3741            as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON)
3742            return self.expression(
3743                exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json
3744            )
3745
3746        expressions = None
3747        if self._match_pair(TokenType.R_PAREN, TokenType.WITH):
3748            self._match_l_paren()
3749            expressions = self._parse_csv(_parse_open_json_column_def)
3750
3751        return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions)
3752
3753    def _parse_position(self, haystack_first: bool = False) -> exp.Expression:
3754        args = self._parse_csv(self._parse_bitwise)
3755
3756        if self._match(TokenType.IN):
3757            return self.expression(
3758                exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0)
3759            )
3760
3761        if haystack_first:
3762            haystack = seq_get(args, 0)
3763            needle = seq_get(args, 1)
3764        else:
3765            needle = seq_get(args, 0)
3766            haystack = seq_get(args, 1)
3767
3768        this = exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
3769
3770        self.validate_expression(this, args)
3771
3772        return this
3773
3774    def _parse_join_hint(self, func_name: str) -> exp.Expression:
3775        args = self._parse_csv(self._parse_table)
3776        return exp.JoinHint(this=func_name.upper(), expressions=args)
3777
3778    def _parse_substring(self) -> exp.Expression:
3779        # Postgres supports the form: substring(string [from int] [for int])
3780        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
3781
3782        args = self._parse_csv(self._parse_bitwise)
3783
3784        if self._match(TokenType.FROM):
3785            args.append(self._parse_bitwise())
3786            if self._match(TokenType.FOR):
3787                args.append(self._parse_bitwise())
3788
3789        this = exp.Substring.from_arg_list(args)
3790        self.validate_expression(this, args)
3791
3792        return this
3793
3794    def _parse_trim(self) -> exp.Expression:
3795        # https://www.w3resource.com/sql/character-functions/trim.php
3796        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
3797
3798        position = None
3799        collation = None
3800
3801        if self._match_set(self.TRIM_TYPES):
3802            position = self._prev.text.upper()
3803
3804        expression = self._parse_bitwise()
3805        if self._match_set((TokenType.FROM, TokenType.COMMA)):
3806            this = self._parse_bitwise()
3807        else:
3808            this = expression
3809            expression = None
3810
3811        if self._match(TokenType.COLLATE):
3812            collation = self._parse_bitwise()
3813
3814        return self.expression(
3815            exp.Trim,
3816            this=this,
3817            position=position,
3818            expression=expression,
3819            collation=collation,
3820        )
3821
3822    def _parse_window_clause(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
3823        return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window)
3824
3825    def _parse_named_window(self) -> t.Optional[exp.Expression]:
3826        return self._parse_window(self._parse_id_var(), alias=True)
3827
3828    def _parse_respect_or_ignore_nulls(
3829        self, this: t.Optional[exp.Expression]
3830    ) -> t.Optional[exp.Expression]:
3831        if self._match(TokenType.IGNORE_NULLS):
3832            return self.expression(exp.IgnoreNulls, this=this)
3833        if self._match(TokenType.RESPECT_NULLS):
3834            return self.expression(exp.RespectNulls, this=this)
3835        return this
3836
3837    def _parse_window(
3838        self, this: t.Optional[exp.Expression], alias: bool = False
3839    ) -> t.Optional[exp.Expression]:
3840        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
3841            this = self.expression(exp.Filter, this=this, expression=self._parse_where())
3842            self._match_r_paren()
3843
3844        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
3845        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
3846        if self._match(TokenType.WITHIN_GROUP):
3847            order = self._parse_wrapped(self._parse_order)
3848            this = self.expression(exp.WithinGroup, this=this, expression=order)
3849
3850        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
3851        # Some dialects choose to implement and some do not.
3852        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
3853
3854        # There is some code above in _parse_lambda that handles
3855        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
3856
3857        # The below changes handle
3858        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
3859
3860        # Oracle allows both formats
3861        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
3862        #   and Snowflake chose to do the same for familiarity
3863        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
3864        this = self._parse_respect_or_ignore_nulls(this)
3865
3866        # bigquery select from window x AS (partition by ...)
3867        if alias:
3868            over = None
3869            self._match(TokenType.ALIAS)
3870        elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS):
3871            return this
3872        else:
3873            over = self._prev.text.upper()
3874
3875        if not self._match(TokenType.L_PAREN):
3876            return self.expression(
3877                exp.Window, this=this, alias=self._parse_id_var(False), over=over
3878            )
3879
3880        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
3881
3882        first = self._match(TokenType.FIRST)
3883        if self._match_text_seq("LAST"):
3884            first = False
3885
3886        partition = self._parse_partition_by()
3887        order = self._parse_order()
3888        kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text
3889
3890        if kind:
3891            self._match(TokenType.BETWEEN)
3892            start = self._parse_window_spec()
3893            self._match(TokenType.AND)
3894            end = self._parse_window_spec()
3895
3896            spec = self.expression(
3897                exp.WindowSpec,
3898                kind=kind,
3899                start=start["value"],
3900                start_side=start["side"],
3901                end=end["value"],
3902                end_side=end["side"],
3903            )
3904        else:
3905            spec = None
3906
3907        self._match_r_paren()
3908
3909        return self.expression(
3910            exp.Window,
3911            this=this,
3912            partition_by=partition,
3913            order=order,
3914            spec=spec,
3915            alias=window_alias,
3916            over=over,
3917            first=first,
3918        )
3919
3920    def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]:
3921        self._match(TokenType.BETWEEN)
3922
3923        return {
3924            "value": (
3925                self._match_set((TokenType.UNBOUNDED, TokenType.CURRENT_ROW)) and self._prev.text
3926            )
3927            or self._parse_bitwise(),
3928            "side": self._match_set((TokenType.PRECEDING, TokenType.FOLLOWING)) and self._prev.text,
3929        }
3930
3931    def _parse_alias(
3932        self, this: t.Optional[exp.Expression], explicit: bool = False
3933    ) -> t.Optional[exp.Expression]:
3934        any_token = self._match(TokenType.ALIAS)
3935
3936        if explicit and not any_token:
3937            return this
3938
3939        if self._match(TokenType.L_PAREN):
3940            aliases = self.expression(
3941                exp.Aliases,
3942                this=this,
3943                expressions=self._parse_csv(lambda: self._parse_id_var(any_token)),
3944            )
3945            self._match_r_paren(aliases)
3946            return aliases
3947
3948        alias = self._parse_id_var(any_token)
3949
3950        if alias:
3951            return self.expression(exp.Alias, this=this, alias=alias)
3952
3953        return this
3954
3955    def _parse_id_var(
3956        self,
3957        any_token: bool = True,
3958        tokens: t.Optional[t.Collection[TokenType]] = None,
3959        prefix_tokens: t.Optional[t.Collection[TokenType]] = None,
3960    ) -> t.Optional[exp.Expression]:
3961        identifier = self._parse_identifier()
3962
3963        if identifier:
3964            return identifier
3965
3966        prefix = ""
3967
3968        if prefix_tokens:
3969            while self._match_set(prefix_tokens):
3970                prefix += self._prev.text
3971
3972        if (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS):
3973            quoted = self._prev.token_type == TokenType.STRING
3974            return exp.Identifier(this=prefix + self._prev.text, quoted=quoted)
3975
3976        return None
3977
3978    def _parse_string(self) -> t.Optional[exp.Expression]:
3979        if self._match(TokenType.STRING):
3980            return self.PRIMARY_PARSERS[TokenType.STRING](self, self._prev)
3981        return self._parse_placeholder()
3982
3983    def _parse_string_as_identifier(self) -> t.Optional[exp.Expression]:
3984        return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True)
3985
3986    def _parse_number(self) -> t.Optional[exp.Expression]:
3987        if self._match(TokenType.NUMBER):
3988            return self.PRIMARY_PARSERS[TokenType.NUMBER](self, self._prev)
3989        return self._parse_placeholder()
3990
3991    def _parse_identifier(self) -> t.Optional[exp.Expression]:
3992        if self._match(TokenType.IDENTIFIER):
3993            return self.expression(exp.Identifier, this=self._prev.text, quoted=True)
3994        return self._parse_placeholder()
3995
3996    def _parse_var(
3997        self, any_token: bool = False, tokens: t.Optional[t.Collection[TokenType]] = None
3998    ) -> t.Optional[exp.Expression]:
3999        if (
4000            (any_token and self._advance_any())
4001            or self._match(TokenType.VAR)
4002            or (self._match_set(tokens) if tokens else False)
4003        ):
4004            return self.expression(exp.Var, this=self._prev.text)
4005        return self._parse_placeholder()
4006
4007    def _advance_any(self) -> t.Optional[Token]:
4008        if self._curr and self._curr.token_type not in self.RESERVED_KEYWORDS:
4009            self._advance()
4010            return self._prev
4011        return None
4012
4013    def _parse_var_or_string(self) -> t.Optional[exp.Expression]:
4014        return self._parse_var() or self._parse_string()
4015
4016    def _parse_null(self) -> t.Optional[exp.Expression]:
4017        if self._match(TokenType.NULL):
4018            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
4019        return None
4020
4021    def _parse_boolean(self) -> t.Optional[exp.Expression]:
4022        if self._match(TokenType.TRUE):
4023            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
4024        if self._match(TokenType.FALSE):
4025            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
4026        return None
4027
4028    def _parse_star(self) -> t.Optional[exp.Expression]:
4029        if self._match(TokenType.STAR):
4030            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
4031        return None
4032
4033    def _parse_parameter(self) -> exp.Expression:
4034        wrapped = self._match(TokenType.L_BRACE)
4035        this = self._parse_var() or self._parse_identifier() or self._parse_primary()
4036        self._match(TokenType.R_BRACE)
4037        return self.expression(exp.Parameter, this=this, wrapped=wrapped)
4038
4039    def _parse_placeholder(self) -> t.Optional[exp.Expression]:
4040        if self._match_set(self.PLACEHOLDER_PARSERS):
4041            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
4042            if placeholder:
4043                return placeholder
4044            self._advance(-1)
4045        return None
4046
4047    def _parse_except(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4048        if not self._match(TokenType.EXCEPT):
4049            return None
4050        if self._match(TokenType.L_PAREN, advance=False):
4051            return self._parse_wrapped_csv(self._parse_column)
4052        return self._parse_csv(self._parse_column)
4053
4054    def _parse_replace(self) -> t.Optional[t.List[t.Optional[exp.Expression]]]:
4055        if not self._match(TokenType.REPLACE):
4056            return None
4057        if self._match(TokenType.L_PAREN, advance=False):
4058            return self._parse_wrapped_csv(self._parse_expression)
4059        return self._parse_csv(self._parse_expression)
4060
4061    def _parse_csv(
4062        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA
4063    ) -> t.List[t.Optional[exp.Expression]]:
4064        parse_result = parse_method()
4065        items = [parse_result] if parse_result is not None else []
4066
4067        while self._match(sep):
4068            self._add_comments(parse_result)
4069            parse_result = parse_method()
4070            if parse_result is not None:
4071                items.append(parse_result)
4072
4073        return items
4074
4075    def _parse_tokens(
4076        self, parse_method: t.Callable, expressions: t.Dict
4077    ) -> t.Optional[exp.Expression]:
4078        this = parse_method()
4079
4080        while self._match_set(expressions):
4081            this = self.expression(
4082                expressions[self._prev.token_type],
4083                this=this,
4084                comments=self._prev_comments,
4085                expression=parse_method(),
4086            )
4087
4088        return this
4089
4090    def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[t.Optional[exp.Expression]]:
4091        return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
4092
4093    def _parse_wrapped_csv(
4094        self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False
4095    ) -> t.List[t.Optional[exp.Expression]]:
4096        return self._parse_wrapped(
4097            lambda: self._parse_csv(parse_method, sep=sep), optional=optional
4098        )
4099
4100    def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any:
4101        wrapped = self._match(TokenType.L_PAREN)
4102        if not wrapped and not optional:
4103            self.raise_error("Expecting (")
4104        parse_result = parse_method()
4105        if wrapped:
4106            self._match_r_paren()
4107        return parse_result
4108
4109    def _parse_select_or_expression(self) -> t.Optional[exp.Expression]:
4110        return self._parse_select() or self._parse_set_operations(self._parse_expression())
4111
4112    def _parse_ddl_select(self) -> t.Optional[exp.Expression]:
4113        return self._parse_set_operations(
4114            self._parse_select(nested=True, parse_subquery_alias=False)
4115        )
4116
4117    def _parse_transaction(self) -> exp.Expression:
4118        this = None
4119        if self._match_texts(self.TRANSACTION_KIND):
4120            this = self._prev.text
4121
4122        self._match_texts({"TRANSACTION", "WORK"})
4123
4124        modes = []
4125        while True:
4126            mode = []
4127            while self._match(TokenType.VAR):
4128                mode.append(self._prev.text)
4129
4130            if mode:
4131                modes.append(" ".join(mode))
4132            if not self._match(TokenType.COMMA):
4133                break
4134
4135        return self.expression(exp.Transaction, this=this, modes=modes)
4136
4137    def _parse_commit_or_rollback(self) -> exp.Expression:
4138        chain = None
4139        savepoint = None
4140        is_rollback = self._prev.token_type == TokenType.ROLLBACK
4141
4142        self._match_texts({"TRANSACTION", "WORK"})
4143
4144        if self._match_text_seq("TO"):
4145            self._match_text_seq("SAVEPOINT")
4146            savepoint = self._parse_id_var()
4147
4148        if self._match(TokenType.AND):
4149            chain = not self._match_text_seq("NO")
4150            self._match_text_seq("CHAIN")
4151
4152        if is_rollback:
4153            return self.expression(exp.Rollback, savepoint=savepoint)
4154        return self.expression(exp.Commit, chain=chain)
4155
4156    def _parse_add_column(self) -> t.Optional[exp.Expression]:
4157        if not self._match_text_seq("ADD"):
4158            return None
4159
4160        self._match(TokenType.COLUMN)
4161        exists_column = self._parse_exists(not_=True)
4162        expression = self._parse_column_def(self._parse_field(any_token=True))
4163
4164        if expression:
4165            expression.set("exists", exists_column)
4166
4167            # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns
4168            if self._match_texts(("FIRST", "AFTER")):
4169                position = self._prev.text
4170                column_position = self.expression(
4171                    exp.ColumnPosition, this=self._parse_column(), position=position
4172                )
4173                expression.set("position", column_position)
4174
4175        return expression
4176
4177    def _parse_drop_column(self) -> t.Optional[exp.Expression]:
4178        drop = self._match(TokenType.DROP) and self._parse_drop()
4179        if drop and not isinstance(drop, exp.Command):
4180            drop.set("kind", drop.args.get("kind", "COLUMN"))
4181        return drop
4182
4183    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
4184    def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.Expression:
4185        return self.expression(
4186            exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists
4187        )
4188
4189    def _parse_add_constraint(self) -> t.Optional[exp.Expression]:
4190        this = None
4191        kind = self._prev.token_type
4192
4193        if kind == TokenType.CONSTRAINT:
4194            this = self._parse_id_var()
4195
4196            if self._match_text_seq("CHECK"):
4197                expression = self._parse_wrapped(self._parse_conjunction)
4198                enforced = self._match_text_seq("ENFORCED")
4199
4200                return self.expression(
4201                    exp.AddConstraint, this=this, expression=expression, enforced=enforced
4202                )
4203
4204        if kind == TokenType.FOREIGN_KEY or self._match(TokenType.FOREIGN_KEY):
4205            expression = self._parse_foreign_key()
4206        elif kind == TokenType.PRIMARY_KEY or self._match(TokenType.PRIMARY_KEY):
4207            expression = self._parse_primary_key()
4208        else:
4209            expression = None
4210
4211        return self.expression(exp.AddConstraint, this=this, expression=expression)
4212
4213    def _parse_alter_table_add(self) -> t.List[t.Optional[exp.Expression]]:
4214        index = self._index - 1
4215
4216        if self._match_set(self.ADD_CONSTRAINT_TOKENS):
4217            return self._parse_csv(self._parse_add_constraint)
4218
4219        self._retreat(index)
4220        return self._parse_csv(self._parse_add_column)
4221
4222    def _parse_alter_table_alter(self) -> exp.Expression:
4223        self._match(TokenType.COLUMN)
4224        column = self._parse_field(any_token=True)
4225
4226        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
4227            return self.expression(exp.AlterColumn, this=column, drop=True)
4228        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
4229            return self.expression(exp.AlterColumn, this=column, default=self._parse_conjunction())
4230
4231        self._match_text_seq("SET", "DATA")
4232        return self.expression(
4233            exp.AlterColumn,
4234            this=column,
4235            dtype=self._match_text_seq("TYPE") and self._parse_types(),
4236            collate=self._match(TokenType.COLLATE) and self._parse_term(),
4237            using=self._match(TokenType.USING) and self._parse_conjunction(),
4238        )
4239
4240    def _parse_alter_table_drop(self) -> t.List[t.Optional[exp.Expression]]:
4241        index = self._index - 1
4242
4243        partition_exists = self._parse_exists()
4244        if self._match(TokenType.PARTITION, advance=False):
4245            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
4246
4247        self._retreat(index)
4248        return self._parse_csv(self._parse_drop_column)
4249
4250    def _parse_alter_table_rename(self) -> exp.Expression:
4251        self._match_text_seq("TO")
4252        return self.expression(exp.RenameTable, this=self._parse_table(schema=True))
4253
4254    def _parse_alter(self) -> t.Optional[exp.Expression]:
4255        start = self._prev
4256
4257        if not self._match(TokenType.TABLE):
4258            return self._parse_as_command(start)
4259
4260        exists = self._parse_exists()
4261        this = self._parse_table(schema=True)
4262
4263        if self._next:
4264            self._advance()
4265        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
4266
4267        if parser:
4268            actions = ensure_list(parser(self))
4269
4270            if not self._curr:
4271                return self.expression(
4272                    exp.AlterTable,
4273                    this=this,
4274                    exists=exists,
4275                    actions=actions,
4276                )
4277        return self._parse_as_command(start)
4278
4279    def _parse_merge(self) -> exp.Expression:
4280        self._match(TokenType.INTO)
4281        target = self._parse_table()
4282
4283        self._match(TokenType.USING)
4284        using = self._parse_table()
4285
4286        self._match(TokenType.ON)
4287        on = self._parse_conjunction()
4288
4289        whens = []
4290        while self._match(TokenType.WHEN):
4291            matched = not self._match(TokenType.NOT)
4292            self._match_text_seq("MATCHED")
4293            source = (
4294                False
4295                if self._match_text_seq("BY", "TARGET")
4296                else self._match_text_seq("BY", "SOURCE")
4297            )
4298            condition = self._parse_conjunction() if self._match(TokenType.AND) else None
4299
4300            self._match(TokenType.THEN)
4301
4302            if self._match(TokenType.INSERT):
4303                _this = self._parse_star()
4304                if _this:
4305                    then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=_this)
4306                else:
4307                    then = self.expression(
4308                        exp.Insert,
4309                        this=self._parse_value(),
4310                        expression=self._match(TokenType.VALUES) and self._parse_value(),
4311                    )
4312            elif self._match(TokenType.UPDATE):
4313                expressions = self._parse_star()
4314                if expressions:
4315                    then = self.expression(exp.Update, expressions=expressions)
4316                else:
4317                    then = self.expression(
4318                        exp.Update,
4319                        expressions=self._match(TokenType.SET)
4320                        and self._parse_csv(self._parse_equality),
4321                    )
4322            elif self._match(TokenType.DELETE):
4323                then = self.expression(exp.Var, this=self._prev.text)
4324            else:
4325                then = None
4326
4327            whens.append(
4328                self.expression(
4329                    exp.When,
4330                    matched=matched,
4331                    source=source,
4332                    condition=condition,
4333                    then=then,
4334                )
4335            )
4336
4337        return self.expression(
4338            exp.Merge,
4339            this=target,
4340            using=using,
4341            on=on,
4342            expressions=whens,
4343        )
4344
4345    def _parse_show(self) -> t.Optional[exp.Expression]:
4346        parser = self._find_parser(self.SHOW_PARSERS, self._show_trie)  # type: ignore
4347        if parser:
4348            return parser(self)
4349        self._advance()
4350        return self.expression(exp.Show, this=self._prev.text.upper())
4351
4352    def _parse_set_item_assignment(
4353        self, kind: t.Optional[str] = None
4354    ) -> t.Optional[exp.Expression]:
4355        index = self._index
4356
4357        if kind in {"GLOBAL", "SESSION"} and self._match_text_seq("TRANSACTION"):
4358            return self._parse_set_transaction(global_=kind == "GLOBAL")
4359
4360        left = self._parse_primary() or self._parse_id_var()
4361
4362        if not self._match_texts(("=", "TO")):
4363            self._retreat(index)
4364            return None
4365
4366        right = self._parse_statement() or self._parse_id_var()
4367        this = self.expression(
4368            exp.EQ,
4369            this=left,
4370            expression=right,
4371        )
4372
4373        return self.expression(
4374            exp.SetItem,
4375            this=this,
4376            kind=kind,
4377        )
4378
4379    def _parse_set_transaction(self, global_: bool = False) -> exp.Expression:
4380        self._match_text_seq("TRANSACTION")
4381        characteristics = self._parse_csv(
4382            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
4383        )
4384        return self.expression(
4385            exp.SetItem,
4386            expressions=characteristics,
4387            kind="TRANSACTION",
4388            **{"global": global_},  # type: ignore
4389        )
4390
4391    def _parse_set_item(self) -> t.Optional[exp.Expression]:
4392        parser = self._find_parser(self.SET_PARSERS, self._set_trie)  # type: ignore
4393        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
4394
4395    def _parse_set(self) -> exp.Expression:
4396        index = self._index
4397        set_ = self.expression(exp.Set, expressions=self._parse_csv(self._parse_set_item))
4398
4399        if self._curr:
4400            self._retreat(index)
4401            return self._parse_as_command(self._prev)
4402
4403        return set_
4404
4405    def _parse_var_from_options(self, options: t.Collection[str]) -> t.Optional[exp.Expression]:
4406        for option in options:
4407            if self._match_text_seq(*option.split(" ")):
4408                return exp.Var(this=option)
4409        return None
4410
4411    def _parse_as_command(self, start: Token) -> exp.Command:
4412        while self._curr:
4413            self._advance()
4414        text = self._find_sql(start, self._prev)
4415        size = len(start.text)
4416        return exp.Command(this=text[:size], expression=text[size:])
4417
4418    def _find_parser(
4419        self, parsers: t.Dict[str, t.Callable], trie: t.Dict
4420    ) -> t.Optional[t.Callable]:
4421        if not self._curr:
4422            return None
4423
4424        index = self._index
4425        this = []
4426        while True:
4427            # The current token might be multiple words
4428            curr = self._curr.text.upper()
4429            key = curr.split(" ")
4430            this.append(curr)
4431            self._advance()
4432            result, trie = in_trie(trie, key)
4433            if result == 0:
4434                break
4435            if result == 2:
4436                subparser = parsers[" ".join(this)]
4437                return subparser
4438        self._retreat(index)
4439        return None
4440
4441    def _match(self, token_type, advance=True, expression=None):
4442        if not self._curr:
4443            return None
4444
4445        if self._curr.token_type == token_type:
4446            if advance:
4447                self._advance()
4448            self._add_comments(expression)
4449            return True
4450
4451        return None
4452
4453    def _match_set(self, types, advance=True):
4454        if not self._curr:
4455            return None
4456
4457        if self._curr.token_type in types:
4458            if advance:
4459                self._advance()
4460            return True
4461
4462        return None
4463
4464    def _match_pair(self, token_type_a, token_type_b, advance=True):
4465        if not self._curr or not self._next:
4466            return None
4467
4468        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
4469            if advance:
4470                self._advance(2)
4471            return True
4472
4473        return None
4474
4475    def _match_l_paren(self, expression=None):
4476        if not self._match(TokenType.L_PAREN, expression=expression):
4477            self.raise_error("Expecting (")
4478
4479    def _match_r_paren(self, expression=None):
4480        if not self._match(TokenType.R_PAREN, expression=expression):
4481            self.raise_error("Expecting )")
4482
4483    def _match_texts(self, texts, advance=True):
4484        if self._curr and self._curr.text.upper() in texts:
4485            if advance:
4486                self._advance()
4487            return True
4488        return False
4489
4490    def _match_text_seq(self, *texts, advance=True):
4491        index = self._index
4492        for text in texts:
4493            if self._curr and self._curr.text.upper() == text:
4494                self._advance()
4495            else:
4496                self._retreat(index)
4497                return False
4498
4499        if not advance:
4500            self._retreat(index)
4501
4502        return True
4503
4504    def _replace_columns_with_dots(self, this):
4505        if isinstance(this, exp.Dot):
4506            exp.replace_children(this, self._replace_columns_with_dots)
4507        elif isinstance(this, exp.Column):
4508            exp.replace_children(this, self._replace_columns_with_dots)
4509            table = this.args.get("table")
4510            this = (
4511                self.expression(exp.Dot, this=table, expression=this.this)
4512                if table
4513                else self.expression(exp.Var, this=this.name)
4514            )
4515        elif isinstance(this, exp.Identifier):
4516            this = self.expression(exp.Var, this=this.name)
4517        return this
4518
4519    def _replace_lambda(self, node, lambda_variables):
4520        for column in node.find_all(exp.Column):
4521            if column.parts[0].name in lambda_variables:
4522                dot_or_id = column.to_dot() if column.table else column.this
4523                parent = column.parent
4524
4525                while isinstance(parent, exp.Dot):
4526                    if not isinstance(parent.parent, exp.Dot):
4527                        parent.replace(dot_or_id)
4528                        break
4529                    parent = parent.parent
4530                else:
4531                    if column is node:
4532                        node = dot_or_id
4533                    else:
4534                        column.replace(dot_or_id)
4535        return node

Parser consumes a list of tokens produced by the sqlglot.tokens.Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: the desired error level. Default: ErrorLevel.RAISE
  • error_message_context: determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 50.
  • index_offset: Index offset for arrays eg ARRAY[0] vs ARRAY[1] as the head of a list. Default: 0
  • alias_post_tablesample: If the table alias comes after tablesample. Default: False
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
  • null_ordering: Indicates the default null ordering method to use if not explicitly set. Options are "nulls_are_small", "nulls_are_large", "nulls_are_last". Default: "nulls_are_small"
Parser( error_level: Optional[sqlglot.errors.ErrorLevel] = None, error_message_context: int = 100, index_offset: int = 0, unnest_column_only: bool = False, alias_post_tablesample: bool = False, max_errors: int = 3, null_ordering: Optional[str] = None)
809    def __init__(
810        self,
811        error_level: t.Optional[ErrorLevel] = None,
812        error_message_context: int = 100,
813        index_offset: int = 0,
814        unnest_column_only: bool = False,
815        alias_post_tablesample: bool = False,
816        max_errors: int = 3,
817        null_ordering: t.Optional[str] = None,
818    ):
819        self.error_level = error_level or ErrorLevel.IMMEDIATE
820        self.error_message_context = error_message_context
821        self.index_offset = index_offset
822        self.unnest_column_only = unnest_column_only
823        self.alias_post_tablesample = alias_post_tablesample
824        self.max_errors = max_errors
825        self.null_ordering = null_ordering
826        self.reset()
def reset(self):
828    def reset(self):
829        self.sql = ""
830        self.errors = []
831        self._tokens = []
832        self._index = 0
833        self._curr = None
834        self._next = None
835        self._prev = None
836        self._prev_comments = None
def parse( self, raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
838    def parse(
839        self, raw_tokens: t.List[Token], sql: t.Optional[str] = None
840    ) -> t.List[t.Optional[exp.Expression]]:
841        """
842        Parses a list of tokens and returns a list of syntax trees, one tree
843        per parsed SQL statement.
844
845        Args:
846            raw_tokens: the list of tokens.
847            sql: the original SQL string, used to produce helpful debug messages.
848
849        Returns:
850            The list of syntax trees.
851        """
852        return self._parse(
853            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
854        )

Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.

Arguments:
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The list of syntax trees.

def parse_into( self, expression_types: Union[str, Type[sqlglot.expressions.Expression], Collection[Union[str, Type[sqlglot.expressions.Expression]]]], raw_tokens: List[sqlglot.tokens.Token], sql: Optional[str] = None) -> List[Optional[sqlglot.expressions.Expression]]:
856    def parse_into(
857        self,
858        expression_types: exp.IntoType,
859        raw_tokens: t.List[Token],
860        sql: t.Optional[str] = None,
861    ) -> t.List[t.Optional[exp.Expression]]:
862        """
863        Parses a list of tokens into a given Expression type. If a collection of Expression
864        types is given instead, this method will try to parse the token list into each one
865        of them, stopping at the first for which the parsing succeeds.
866
867        Args:
868            expression_types: the expression type(s) to try and parse the token list into.
869            raw_tokens: the list of tokens.
870            sql: the original SQL string, used to produce helpful debug messages.
871
872        Returns:
873            The target Expression.
874        """
875        errors = []
876        for expression_type in ensure_collection(expression_types):
877            parser = self.EXPRESSION_PARSERS.get(expression_type)
878            if not parser:
879                raise TypeError(f"No parser registered for {expression_type}")
880            try:
881                return self._parse(parser, raw_tokens, sql)
882            except ParseError as e:
883                e.errors[0]["into_expression"] = expression_type
884                errors.append(e)
885        raise ParseError(
886            f"Failed to parse into {expression_types}",
887            errors=merge_errors(errors),
888        ) from errors[-1]

Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.

Arguments:
  • expression_types: the expression type(s) to try and parse the token list into.
  • raw_tokens: the list of tokens.
  • sql: the original SQL string, used to produce helpful debug messages.
Returns:

The target Expression.

def check_errors(self) -> None:
924    def check_errors(self) -> None:
925        """
926        Logs or raises any found errors, depending on the chosen error level setting.
927        """
928        if self.error_level == ErrorLevel.WARN:
929            for error in self.errors:
930                logger.error(str(error))
931        elif self.error_level == ErrorLevel.RAISE and self.errors:
932            raise ParseError(
933                concat_messages(self.errors, self.max_errors),
934                errors=merge_errors(self.errors),
935            )

Logs or raises any found errors, depending on the chosen error level setting.

def raise_error(self, message: str, token: Optional[sqlglot.tokens.Token] = None) -> None:
937    def raise_error(self, message: str, token: t.Optional[Token] = None) -> None:
938        """
939        Appends an error in the list of recorded errors or raises it, depending on the chosen
940        error level setting.
941        """
942        token = token or self._curr or self._prev or Token.string("")
943        start = token.start
944        end = token.end
945        start_context = self.sql[max(start - self.error_message_context, 0) : start]
946        highlight = self.sql[start:end]
947        end_context = self.sql[end : end + self.error_message_context]
948
949        error = ParseError.new(
950            f"{message}. Line {token.line}, Col: {token.col}.\n"
951            f"  {start_context}\033[4m{highlight}\033[0m{end_context}",
952            description=message,
953            line=token.line,
954            col=token.col,
955            start_context=start_context,
956            highlight=highlight,
957            end_context=end_context,
958        )
959
960        if self.error_level == ErrorLevel.IMMEDIATE:
961            raise error
962
963        self.errors.append(error)

Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.

def expression( self, exp_class: Type[~E], comments: Optional[List[str]] = None, **kwargs) -> ~E:
965    def expression(
966        self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs
967    ) -> E:
968        """
969        Creates a new, validated Expression.
970
971        Args:
972            exp_class: the expression class to instantiate.
973            comments: an optional list of comments to attach to the expression.
974            kwargs: the arguments to set for the expression along with their respective values.
975
976        Returns:
977            The target expression.
978        """
979        instance = exp_class(**kwargs)
980        instance.add_comments(comments) if comments else self._add_comments(instance)
981        self.validate_expression(instance)
982        return instance

Creates a new, validated Expression.

Arguments:
  • exp_class: the expression class to instantiate.
  • comments: an optional list of comments to attach to the expression.
  • kwargs: the arguments to set for the expression along with their respective values.
Returns:

The target expression.

def validate_expression( self, expression: sqlglot.expressions.Expression, args: Optional[List] = None) -> None:
 989    def validate_expression(
 990        self, expression: exp.Expression, args: t.Optional[t.List] = None
 991    ) -> None:
 992        """
 993        Validates an already instantiated expression, making sure that all its mandatory arguments
 994        are set.
 995
 996        Args:
 997            expression: the expression to validate.
 998            args: an optional list of items that was used to instantiate the expression, if it's a Func.
 999        """
1000        if self.error_level == ErrorLevel.IGNORE:
1001            return
1002
1003        for error_message in expression.error_messages(args):
1004            self.raise_error(error_message)

Validates an already instantiated expression, making sure that all its mandatory arguments are set.

Arguments:
  • expression: the expression to validate.
  • args: an optional list of items that was used to instantiate the expression, if it's a Func.