sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 build_timestamp_from_parts, 12 date_delta_sql, 13 date_trunc_to_time, 14 datestrtodate_sql, 15 build_formatted_time, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 rename_func, 21 timestamptrunc_sql, 22 timestrtotime_sql, 23 var_map_sql, 24 map_date_part, 25) 26from sqlglot.helper import flatten, is_float, is_int, seq_get 27from sqlglot.tokens import TokenType 28 29if t.TYPE_CHECKING: 30 from sqlglot._typing import E 31 32 33# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 34def _build_datetime( 35 name: str, kind: exp.DataType.Type, safe: bool = False 36) -> t.Callable[[t.List], exp.Func]: 37 def _builder(args: t.List) -> exp.Func: 38 value = seq_get(args, 0) 39 int_value = value is not None and is_int(value.name) 40 41 if isinstance(value, exp.Literal): 42 # Converts calls like `TO_TIME('01:02:03')` into casts 43 if len(args) == 1 and value.is_string and not int_value: 44 return exp.cast(value, kind) 45 46 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 47 # cases so we can transpile them, since they're relatively common 48 if kind == exp.DataType.Type.TIMESTAMP: 49 if int_value: 50 return exp.UnixToTime(this=value, scale=seq_get(args, 1)) 51 if not is_float(value.this): 52 return build_formatted_time(exp.StrToTime, "snowflake")(args) 53 54 if kind == exp.DataType.Type.DATE and not int_value: 55 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 56 formatted_exp.set("safe", safe) 57 return formatted_exp 58 59 return exp.Anonymous(this=name, expressions=args) 60 61 return _builder 62 63 64def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 65 expression = parser.build_var_map(args) 66 67 if isinstance(expression, exp.StarMap): 68 return expression 69 70 return exp.Struct( 71 expressions=[ 72 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 73 ] 74 ) 75 76 77def _build_datediff(args: t.List) -> exp.DateDiff: 78 return exp.DateDiff( 79 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 80 ) 81 82 83def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 84 def _builder(args: t.List) -> E: 85 return expr_type( 86 this=seq_get(args, 2), 87 expression=seq_get(args, 1), 88 unit=map_date_part(seq_get(args, 0)), 89 ) 90 91 return _builder 92 93 94# https://docs.snowflake.com/en/sql-reference/functions/div0 95def _build_if_from_div0(args: t.List) -> exp.If: 96 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 97 true = exp.Literal.number(0) 98 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 99 return exp.If(this=cond, true=true, false=false) 100 101 102# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 103def _build_if_from_zeroifnull(args: t.List) -> exp.If: 104 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 105 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 106 107 108# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 109def _build_if_from_nullifzero(args: t.List) -> exp.If: 110 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 111 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 112 113 114def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 115 flag = expression.text("flag") 116 117 if "i" not in flag: 118 flag += "i" 119 120 return self.func( 121 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 122 ) 123 124 125def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 126 regexp_replace = exp.RegexpReplace.from_arg_list(args) 127 128 if not regexp_replace.args.get("replacement"): 129 regexp_replace.set("replacement", exp.Literal.string("")) 130 131 return regexp_replace 132 133 134def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 135 def _parse(self: Snowflake.Parser) -> exp.Show: 136 return self._parse_show_snowflake(*args, **kwargs) 137 138 return _parse 139 140 141def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 142 trunc = date_trunc_to_time(args) 143 trunc.set("unit", map_date_part(trunc.args["unit"])) 144 return trunc 145 146 147def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 148 """ 149 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 150 so we need to unqualify them. 151 152 Example: 153 >>> from sqlglot import parse_one 154 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 155 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 156 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 157 """ 158 if isinstance(expression, exp.Pivot) and expression.unpivot: 159 expression = transforms.unqualify_columns(expression) 160 161 return expression 162 163 164def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 165 assert isinstance(expression, exp.Create) 166 167 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 168 if expression.this in exp.DataType.NESTED_TYPES: 169 expression.set("expressions", None) 170 return expression 171 172 props = expression.args.get("properties") 173 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 174 for schema_expression in expression.this.expressions: 175 if isinstance(schema_expression, exp.ColumnDef): 176 column_type = schema_expression.kind 177 if isinstance(column_type, exp.DataType): 178 column_type.transform(_flatten_structured_type, copy=False) 179 180 return expression 181 182 183def _unnest_generate_date_array(expression: exp.Expression) -> exp.Expression: 184 if isinstance(expression, exp.Select): 185 for unnest in expression.find_all(exp.Unnest): 186 if ( 187 isinstance(unnest.parent, (exp.From, exp.Join)) 188 and len(unnest.expressions) == 1 189 and isinstance(unnest.expressions[0], exp.GenerateDateArray) 190 ): 191 generate_date_array = unnest.expressions[0] 192 start = generate_date_array.args.get("start") 193 end = generate_date_array.args.get("end") 194 step = generate_date_array.args.get("step") 195 196 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 197 continue 198 199 unit = step.args.get("unit") 200 201 # We'll add the next sequence value to the starting date and project the result 202 date_add = _build_date_time_add(exp.DateAdd)( 203 [unit, exp.cast("value", "int"), exp.cast(start, "date")] 204 ).as_("value") 205 206 # We use DATEDIFF to compute the number of sequence values needed 207 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 208 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 209 ) 210 211 unnest_alias = unnest.args.get("alias") 212 if unnest_alias: 213 unnest_alias = unnest_alias.copy() 214 215 unnest.set("expressions", [number_sequence]) 216 unnest.replace(exp.select(date_add).from_(unnest.copy()).subquery(unnest_alias)) 217 218 return expression 219 220 221class Snowflake(Dialect): 222 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 223 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 224 NULL_ORDERING = "nulls_are_large" 225 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 226 SUPPORTS_USER_DEFINED_TYPES = False 227 SUPPORTS_SEMI_ANTI_JOIN = False 228 PREFER_CTE_ALIAS_COLUMN = True 229 TABLESAMPLE_SIZE_IS_PERCENT = True 230 COPY_PARAMS_ARE_CSV = False 231 232 TIME_MAPPING = { 233 "YYYY": "%Y", 234 "yyyy": "%Y", 235 "YY": "%y", 236 "yy": "%y", 237 "MMMM": "%B", 238 "mmmm": "%B", 239 "MON": "%b", 240 "mon": "%b", 241 "MM": "%m", 242 "mm": "%m", 243 "DD": "%d", 244 "dd": "%-d", 245 "DY": "%a", 246 "dy": "%w", 247 "HH24": "%H", 248 "hh24": "%H", 249 "HH12": "%I", 250 "hh12": "%I", 251 "MI": "%M", 252 "mi": "%M", 253 "SS": "%S", 254 "ss": "%S", 255 "FF": "%f", 256 "ff": "%f", 257 "FF6": "%f", 258 "ff6": "%f", 259 } 260 261 def quote_identifier(self, expression: E, identify: bool = True) -> E: 262 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 263 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 264 if ( 265 isinstance(expression, exp.Identifier) 266 and isinstance(expression.parent, exp.Table) 267 and expression.name.lower() == "dual" 268 ): 269 return expression # type: ignore 270 271 return super().quote_identifier(expression, identify=identify) 272 273 class Parser(parser.Parser): 274 IDENTIFY_PIVOT_STRINGS = True 275 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 276 COLON_IS_VARIANT_EXTRACT = True 277 278 ID_VAR_TOKENS = { 279 *parser.Parser.ID_VAR_TOKENS, 280 TokenType.MATCH_CONDITION, 281 } 282 283 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 284 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 285 286 FUNCTIONS = { 287 **parser.Parser.FUNCTIONS, 288 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 289 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 290 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 291 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 292 this=seq_get(args, 1), expression=seq_get(args, 0) 293 ), 294 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 295 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 296 start=seq_get(args, 0), 297 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 298 step=seq_get(args, 2), 299 ), 300 "BITXOR": binary_from_function(exp.BitwiseXor), 301 "BIT_XOR": binary_from_function(exp.BitwiseXor), 302 "BOOLXOR": binary_from_function(exp.Xor), 303 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 304 "DATE_TRUNC": _date_trunc_to_time, 305 "DATEADD": _build_date_time_add(exp.DateAdd), 306 "DATEDIFF": _build_datediff, 307 "DIV0": _build_if_from_div0, 308 "FLATTEN": exp.Explode.from_arg_list, 309 "GET_PATH": lambda args, dialect: exp.JSONExtract( 310 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 311 ), 312 "IFF": exp.If.from_arg_list, 313 "LAST_DAY": lambda args: exp.LastDay( 314 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 315 ), 316 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 317 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 318 "LISTAGG": exp.GroupConcat.from_arg_list, 319 "MEDIAN": lambda args: exp.PercentileCont( 320 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 321 ), 322 "NULLIFZERO": _build_if_from_nullifzero, 323 "OBJECT_CONSTRUCT": _build_object_construct, 324 "REGEXP_REPLACE": _build_regexp_replace, 325 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 326 "RLIKE": exp.RegexpLike.from_arg_list, 327 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 328 "TIMEADD": _build_date_time_add(exp.TimeAdd), 329 "TIMEDIFF": _build_datediff, 330 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 331 "TIMESTAMPDIFF": _build_datediff, 332 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 333 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 334 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 335 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 336 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 337 "TO_NUMBER": lambda args: exp.ToNumber( 338 this=seq_get(args, 0), 339 format=seq_get(args, 1), 340 precision=seq_get(args, 2), 341 scale=seq_get(args, 3), 342 ), 343 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 344 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 345 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 346 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 347 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 348 "TO_VARCHAR": exp.ToChar.from_arg_list, 349 "ZEROIFNULL": _build_if_from_zeroifnull, 350 } 351 352 FUNCTION_PARSERS = { 353 **parser.Parser.FUNCTION_PARSERS, 354 "DATE_PART": lambda self: self._parse_date_part(), 355 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 356 } 357 FUNCTION_PARSERS.pop("TRIM") 358 359 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 360 361 RANGE_PARSERS = { 362 **parser.Parser.RANGE_PARSERS, 363 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 364 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 365 } 366 367 ALTER_PARSERS = { 368 **parser.Parser.ALTER_PARSERS, 369 "UNSET": lambda self: self.expression( 370 exp.Set, 371 tag=self._match_text_seq("TAG"), 372 expressions=self._parse_csv(self._parse_id_var), 373 unset=True, 374 ), 375 "SWAP": lambda self: self._parse_alter_table_swap(), 376 } 377 378 STATEMENT_PARSERS = { 379 **parser.Parser.STATEMENT_PARSERS, 380 TokenType.SHOW: lambda self: self._parse_show(), 381 } 382 383 PROPERTY_PARSERS = { 384 **parser.Parser.PROPERTY_PARSERS, 385 "LOCATION": lambda self: self._parse_location_property(), 386 } 387 388 TYPE_CONVERTERS = { 389 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 390 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 391 } 392 393 SHOW_PARSERS = { 394 "SCHEMAS": _show_parser("SCHEMAS"), 395 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 396 "OBJECTS": _show_parser("OBJECTS"), 397 "TERSE OBJECTS": _show_parser("OBJECTS"), 398 "TABLES": _show_parser("TABLES"), 399 "TERSE TABLES": _show_parser("TABLES"), 400 "VIEWS": _show_parser("VIEWS"), 401 "TERSE VIEWS": _show_parser("VIEWS"), 402 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 403 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 404 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 405 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 406 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 407 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 408 "SEQUENCES": _show_parser("SEQUENCES"), 409 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 410 "COLUMNS": _show_parser("COLUMNS"), 411 "USERS": _show_parser("USERS"), 412 "TERSE USERS": _show_parser("USERS"), 413 } 414 415 CONSTRAINT_PARSERS = { 416 **parser.Parser.CONSTRAINT_PARSERS, 417 "WITH": lambda self: self._parse_with_constraint(), 418 "MASKING": lambda self: self._parse_with_constraint(), 419 "PROJECTION": lambda self: self._parse_with_constraint(), 420 "TAG": lambda self: self._parse_with_constraint(), 421 } 422 423 STAGED_FILE_SINGLE_TOKENS = { 424 TokenType.DOT, 425 TokenType.MOD, 426 TokenType.SLASH, 427 } 428 429 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 430 431 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 432 433 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 434 435 LAMBDAS = { 436 **parser.Parser.LAMBDAS, 437 TokenType.ARROW: lambda self, expressions: self.expression( 438 exp.Lambda, 439 this=self._replace_lambda( 440 self._parse_assignment(), 441 expressions, 442 ), 443 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 444 ), 445 } 446 447 def _negate_range( 448 self, this: t.Optional[exp.Expression] = None 449 ) -> t.Optional[exp.Expression]: 450 if not this: 451 return this 452 453 query = this.args.get("query") 454 if isinstance(this, exp.In) and isinstance(query, exp.Query): 455 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 456 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 457 # which can produce different results (most likely a SnowFlake bug). 458 # 459 # https://docs.snowflake.com/en/sql-reference/functions/in 460 # Context: https://github.com/tobymao/sqlglot/issues/3890 461 return self.expression( 462 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 463 ) 464 465 return self.expression(exp.Not, this=this) 466 467 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 468 if self._prev.token_type != TokenType.WITH: 469 self._retreat(self._index - 1) 470 471 if self._match_text_seq("MASKING", "POLICY"): 472 policy = self._parse_column() 473 return self.expression( 474 exp.MaskingPolicyColumnConstraint, 475 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 476 expressions=self._match(TokenType.USING) 477 and self._parse_wrapped_csv(self._parse_id_var), 478 ) 479 if self._match_text_seq("PROJECTION", "POLICY"): 480 policy = self._parse_column() 481 return self.expression( 482 exp.ProjectionPolicyColumnConstraint, 483 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 484 ) 485 if self._match(TokenType.TAG): 486 return self.expression( 487 exp.TagColumnConstraint, 488 expressions=self._parse_wrapped_csv(self._parse_property), 489 ) 490 491 return None 492 493 def _parse_create(self) -> exp.Create | exp.Command: 494 expression = super()._parse_create() 495 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 496 # Replace the Table node with the enclosed Identifier 497 expression.this.replace(expression.this.this) 498 499 return expression 500 501 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 502 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 503 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 504 this = self._parse_var() or self._parse_type() 505 506 if not this: 507 return None 508 509 self._match(TokenType.COMMA) 510 expression = self._parse_bitwise() 511 this = map_date_part(this) 512 name = this.name.upper() 513 514 if name.startswith("EPOCH"): 515 if name == "EPOCH_MILLISECOND": 516 scale = 10**3 517 elif name == "EPOCH_MICROSECOND": 518 scale = 10**6 519 elif name == "EPOCH_NANOSECOND": 520 scale = 10**9 521 else: 522 scale = None 523 524 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 525 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 526 527 if scale: 528 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 529 530 return to_unix 531 532 return self.expression(exp.Extract, this=this, expression=expression) 533 534 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 535 if is_map: 536 # Keys are strings in Snowflake's objects, see also: 537 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 538 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 539 return self._parse_slice(self._parse_string()) 540 541 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 542 543 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 544 lateral = super()._parse_lateral() 545 if not lateral: 546 return lateral 547 548 if isinstance(lateral.this, exp.Explode): 549 table_alias = lateral.args.get("alias") 550 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 551 if table_alias and not table_alias.args.get("columns"): 552 table_alias.set("columns", columns) 553 elif not table_alias: 554 exp.alias_(lateral, "_flattened", table=columns, copy=False) 555 556 return lateral 557 558 def _parse_table_parts( 559 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 560 ) -> exp.Table: 561 # https://docs.snowflake.com/en/user-guide/querying-stage 562 if self._match(TokenType.STRING, advance=False): 563 table = self._parse_string() 564 elif self._match_text_seq("@", advance=False): 565 table = self._parse_location_path() 566 else: 567 table = None 568 569 if table: 570 file_format = None 571 pattern = None 572 573 wrapped = self._match(TokenType.L_PAREN) 574 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 575 if self._match_text_seq("FILE_FORMAT", "=>"): 576 file_format = self._parse_string() or super()._parse_table_parts( 577 is_db_reference=is_db_reference 578 ) 579 elif self._match_text_seq("PATTERN", "=>"): 580 pattern = self._parse_string() 581 else: 582 break 583 584 self._match(TokenType.COMMA) 585 586 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 587 else: 588 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 589 590 return table 591 592 def _parse_id_var( 593 self, 594 any_token: bool = True, 595 tokens: t.Optional[t.Collection[TokenType]] = None, 596 ) -> t.Optional[exp.Expression]: 597 if self._match_text_seq("IDENTIFIER", "("): 598 identifier = ( 599 super()._parse_id_var(any_token=any_token, tokens=tokens) 600 or self._parse_string() 601 ) 602 self._match_r_paren() 603 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 604 605 return super()._parse_id_var(any_token=any_token, tokens=tokens) 606 607 def _parse_show_snowflake(self, this: str) -> exp.Show: 608 scope = None 609 scope_kind = None 610 611 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 612 # which is syntactically valid but has no effect on the output 613 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 614 615 history = self._match_text_seq("HISTORY") 616 617 like = self._parse_string() if self._match(TokenType.LIKE) else None 618 619 if self._match(TokenType.IN): 620 if self._match_text_seq("ACCOUNT"): 621 scope_kind = "ACCOUNT" 622 elif self._match_set(self.DB_CREATABLES): 623 scope_kind = self._prev.text.upper() 624 if self._curr: 625 scope = self._parse_table_parts() 626 elif self._curr: 627 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 628 scope = self._parse_table_parts() 629 630 return self.expression( 631 exp.Show, 632 **{ 633 "terse": terse, 634 "this": this, 635 "history": history, 636 "like": like, 637 "scope": scope, 638 "scope_kind": scope_kind, 639 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 640 "limit": self._parse_limit(), 641 "from": self._parse_string() if self._match(TokenType.FROM) else None, 642 }, 643 ) 644 645 def _parse_alter_table_swap(self) -> exp.SwapTable: 646 self._match_text_seq("WITH") 647 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 648 649 def _parse_location_property(self) -> exp.LocationProperty: 650 self._match(TokenType.EQ) 651 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 652 653 def _parse_file_location(self) -> t.Optional[exp.Expression]: 654 # Parse either a subquery or a staged file 655 return ( 656 self._parse_select(table=True, parse_subquery_alias=False) 657 if self._match(TokenType.L_PAREN, advance=False) 658 else self._parse_table_parts() 659 ) 660 661 def _parse_location_path(self) -> exp.Var: 662 parts = [self._advance_any(ignore_reserved=True)] 663 664 # We avoid consuming a comma token because external tables like @foo and @bar 665 # can be joined in a query with a comma separator, as well as closing paren 666 # in case of subqueries 667 while self._is_connected() and not self._match_set( 668 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 669 ): 670 parts.append(self._advance_any(ignore_reserved=True)) 671 672 return exp.var("".join(part.text for part in parts if part)) 673 674 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 675 this = super()._parse_lambda_arg() 676 677 if not this: 678 return this 679 680 typ = self._parse_types() 681 682 if typ: 683 return self.expression(exp.Cast, this=this, to=typ) 684 685 return this 686 687 class Tokenizer(tokens.Tokenizer): 688 STRING_ESCAPES = ["\\", "'"] 689 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 690 RAW_STRINGS = ["$$"] 691 COMMENTS = ["--", "//", ("/*", "*/")] 692 693 KEYWORDS = { 694 **tokens.Tokenizer.KEYWORDS, 695 "BYTEINT": TokenType.INT, 696 "CHAR VARYING": TokenType.VARCHAR, 697 "CHARACTER VARYING": TokenType.VARCHAR, 698 "EXCLUDE": TokenType.EXCEPT, 699 "ILIKE ANY": TokenType.ILIKE_ANY, 700 "LIKE ANY": TokenType.LIKE_ANY, 701 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 702 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 703 "MINUS": TokenType.EXCEPT, 704 "NCHAR VARYING": TokenType.VARCHAR, 705 "PUT": TokenType.COMMAND, 706 "REMOVE": TokenType.COMMAND, 707 "RM": TokenType.COMMAND, 708 "SAMPLE": TokenType.TABLE_SAMPLE, 709 "SQL_DOUBLE": TokenType.DOUBLE, 710 "SQL_VARCHAR": TokenType.VARCHAR, 711 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 712 "TAG": TokenType.TAG, 713 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 714 "TOP": TokenType.TOP, 715 "WAREHOUSE": TokenType.WAREHOUSE, 716 "STREAMLIT": TokenType.STREAMLIT, 717 } 718 KEYWORDS.pop("/*+") 719 720 SINGLE_TOKENS = { 721 **tokens.Tokenizer.SINGLE_TOKENS, 722 "$": TokenType.PARAMETER, 723 } 724 725 VAR_SINGLE_TOKENS = {"$"} 726 727 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 728 729 class Generator(generator.Generator): 730 PARAMETER_TOKEN = "$" 731 MATCHED_BY_SOURCE = False 732 SINGLE_STRING_INTERVAL = True 733 JOIN_HINTS = False 734 TABLE_HINTS = False 735 QUERY_HINTS = False 736 AGGREGATE_FILTER_SUPPORTED = False 737 SUPPORTS_TABLE_COPY = False 738 COLLATE_IS_FUNC = True 739 LIMIT_ONLY_LITERALS = True 740 JSON_KEY_VALUE_PAIR_SEP = "," 741 INSERT_OVERWRITE = " OVERWRITE INTO" 742 STRUCT_DELIMITER = ("(", ")") 743 COPY_PARAMS_ARE_WRAPPED = False 744 COPY_PARAMS_EQ_REQUIRED = True 745 STAR_EXCEPT = "EXCLUDE" 746 SUPPORTS_EXPLODING_PROJECTIONS = False 747 ARRAY_CONCAT_IS_VAR_LEN = False 748 SUPPORTS_CONVERT_TIMEZONE = True 749 750 TRANSFORMS = { 751 **generator.Generator.TRANSFORMS, 752 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 753 exp.ArgMax: rename_func("MAX_BY"), 754 exp.ArgMin: rename_func("MIN_BY"), 755 exp.Array: inline_array_sql, 756 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 757 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 758 exp.AtTimeZone: lambda self, e: self.func( 759 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 760 ), 761 exp.BitwiseXor: rename_func("BITXOR"), 762 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 763 exp.DateAdd: date_delta_sql("DATEADD"), 764 exp.DateDiff: date_delta_sql("DATEDIFF"), 765 exp.DateStrToDate: datestrtodate_sql, 766 exp.DayOfMonth: rename_func("DAYOFMONTH"), 767 exp.DayOfWeek: rename_func("DAYOFWEEK"), 768 exp.DayOfYear: rename_func("DAYOFYEAR"), 769 exp.Explode: rename_func("FLATTEN"), 770 exp.Extract: rename_func("DATE_PART"), 771 exp.FromTimeZone: lambda self, e: self.func( 772 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 773 ), 774 exp.GenerateSeries: lambda self, e: self.func( 775 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 776 ), 777 exp.GroupConcat: rename_func("LISTAGG"), 778 exp.If: if_sql(name="IFF", false_value="NULL"), 779 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 780 exp.JSONExtractScalar: lambda self, e: self.func( 781 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 782 ), 783 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 784 exp.JSONPathRoot: lambda *_: "", 785 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 786 exp.LogicalOr: rename_func("BOOLOR_AGG"), 787 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 788 exp.Max: max_or_greatest, 789 exp.Min: min_or_least, 790 exp.ParseJSON: lambda self, e: self.func( 791 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 792 ), 793 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 794 exp.PercentileCont: transforms.preprocess( 795 [transforms.add_within_group_for_percentiles] 796 ), 797 exp.PercentileDisc: transforms.preprocess( 798 [transforms.add_within_group_for_percentiles] 799 ), 800 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 801 exp.RegexpILike: _regexpilike_sql, 802 exp.Rand: rename_func("RANDOM"), 803 exp.Select: transforms.preprocess( 804 [ 805 transforms.eliminate_distinct_on, 806 transforms.explode_to_unnest(), 807 transforms.eliminate_semi_and_anti_joins, 808 _unnest_generate_date_array, 809 ] 810 ), 811 exp.SHA: rename_func("SHA1"), 812 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 813 exp.StartsWith: rename_func("STARTSWITH"), 814 exp.StrPosition: lambda self, e: self.func( 815 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 816 ), 817 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 818 exp.Stuff: rename_func("INSERT"), 819 exp.TimeAdd: date_delta_sql("TIMEADD"), 820 exp.TimestampDiff: lambda self, e: self.func( 821 "TIMESTAMPDIFF", e.unit, e.expression, e.this 822 ), 823 exp.TimestampTrunc: timestamptrunc_sql(), 824 exp.TimeStrToTime: timestrtotime_sql, 825 exp.TimeToStr: lambda self, e: self.func( 826 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 827 ), 828 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 829 exp.ToArray: rename_func("TO_ARRAY"), 830 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 831 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 832 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 833 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 834 exp.TsOrDsToDate: lambda self, e: self.func( 835 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 836 ), 837 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 838 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 839 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 840 exp.Xor: rename_func("BOOLXOR"), 841 } 842 843 SUPPORTED_JSON_PATH_PARTS = { 844 exp.JSONPathKey, 845 exp.JSONPathRoot, 846 exp.JSONPathSubscript, 847 } 848 849 TYPE_MAPPING = { 850 **generator.Generator.TYPE_MAPPING, 851 exp.DataType.Type.NESTED: "OBJECT", 852 exp.DataType.Type.STRUCT: "OBJECT", 853 } 854 855 PROPERTIES_LOCATION = { 856 **generator.Generator.PROPERTIES_LOCATION, 857 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 858 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 859 } 860 861 UNSUPPORTED_VALUES_EXPRESSIONS = { 862 exp.Map, 863 exp.StarMap, 864 exp.Struct, 865 exp.VarMap, 866 } 867 868 def with_properties(self, properties: exp.Properties) -> str: 869 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 870 871 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 872 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 873 values_as_table = False 874 875 return super().values_sql(expression, values_as_table=values_as_table) 876 877 def datatype_sql(self, expression: exp.DataType) -> str: 878 expressions = expression.expressions 879 if ( 880 expressions 881 and expression.is_type(*exp.DataType.STRUCT_TYPES) 882 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 883 ): 884 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 885 return "OBJECT" 886 887 return super().datatype_sql(expression) 888 889 def tonumber_sql(self, expression: exp.ToNumber) -> str: 890 return self.func( 891 "TO_NUMBER", 892 expression.this, 893 expression.args.get("format"), 894 expression.args.get("precision"), 895 expression.args.get("scale"), 896 ) 897 898 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 899 milli = expression.args.get("milli") 900 if milli is not None: 901 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 902 expression.set("nano", milli_to_nano) 903 904 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 905 906 def trycast_sql(self, expression: exp.TryCast) -> str: 907 value = expression.this 908 909 if value.type is None: 910 from sqlglot.optimizer.annotate_types import annotate_types 911 912 value = annotate_types(value) 913 914 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 915 return super().trycast_sql(expression) 916 917 # TRY_CAST only works for string values in Snowflake 918 return self.cast_sql(expression) 919 920 def log_sql(self, expression: exp.Log) -> str: 921 if not expression.expression: 922 return self.func("LN", expression.this) 923 924 return super().log_sql(expression) 925 926 def unnest_sql(self, expression: exp.Unnest) -> str: 927 unnest_alias = expression.args.get("alias") 928 offset = expression.args.get("offset") 929 930 columns = [ 931 exp.to_identifier("seq"), 932 exp.to_identifier("key"), 933 exp.to_identifier("path"), 934 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 935 seq_get(unnest_alias.columns if unnest_alias else [], 0) 936 or exp.to_identifier("value"), 937 exp.to_identifier("this"), 938 ] 939 940 if unnest_alias: 941 unnest_alias.set("columns", columns) 942 else: 943 unnest_alias = exp.TableAlias(this="_u", columns=columns) 944 945 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 946 alias = self.sql(unnest_alias) 947 alias = f" AS {alias}" if alias else "" 948 return f"{explode}{alias}" 949 950 def show_sql(self, expression: exp.Show) -> str: 951 terse = "TERSE " if expression.args.get("terse") else "" 952 history = " HISTORY" if expression.args.get("history") else "" 953 like = self.sql(expression, "like") 954 like = f" LIKE {like}" if like else "" 955 956 scope = self.sql(expression, "scope") 957 scope = f" {scope}" if scope else "" 958 959 scope_kind = self.sql(expression, "scope_kind") 960 if scope_kind: 961 scope_kind = f" IN {scope_kind}" 962 963 starts_with = self.sql(expression, "starts_with") 964 if starts_with: 965 starts_with = f" STARTS WITH {starts_with}" 966 967 limit = self.sql(expression, "limit") 968 969 from_ = self.sql(expression, "from") 970 if from_: 971 from_ = f" FROM {from_}" 972 973 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 974 975 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 976 # Other dialects don't support all of the following parameters, so we need to 977 # generate default values as necessary to ensure the transpilation is correct 978 group = expression.args.get("group") 979 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 980 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 981 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 982 983 return self.func( 984 "REGEXP_SUBSTR", 985 expression.this, 986 expression.expression, 987 position, 988 occurrence, 989 parameters, 990 group, 991 ) 992 993 def except_op(self, expression: exp.Except) -> str: 994 if not expression.args.get("distinct"): 995 self.unsupported("EXCEPT with All is not supported in Snowflake") 996 return super().except_op(expression) 997 998 def intersect_op(self, expression: exp.Intersect) -> str: 999 if not expression.args.get("distinct"): 1000 self.unsupported("INTERSECT with All is not supported in Snowflake") 1001 return super().intersect_op(expression) 1002 1003 def describe_sql(self, expression: exp.Describe) -> str: 1004 # Default to table if kind is unknown 1005 kind_value = expression.args.get("kind") or "TABLE" 1006 kind = f" {kind_value}" if kind_value else "" 1007 this = f" {self.sql(expression, 'this')}" 1008 expressions = self.expressions(expression, flat=True) 1009 expressions = f" {expressions}" if expressions else "" 1010 return f"DESCRIBE{kind}{this}{expressions}" 1011 1012 def generatedasidentitycolumnconstraint_sql( 1013 self, expression: exp.GeneratedAsIdentityColumnConstraint 1014 ) -> str: 1015 start = expression.args.get("start") 1016 start = f" START {start}" if start else "" 1017 increment = expression.args.get("increment") 1018 increment = f" INCREMENT {increment}" if increment else "" 1019 return f"AUTOINCREMENT{start}{increment}" 1020 1021 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1022 this = self.sql(expression, "this") 1023 return f"SWAP WITH {this}" 1024 1025 def cluster_sql(self, expression: exp.Cluster) -> str: 1026 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1027 1028 def struct_sql(self, expression: exp.Struct) -> str: 1029 keys = [] 1030 values = [] 1031 1032 for i, e in enumerate(expression.expressions): 1033 if isinstance(e, exp.PropertyEQ): 1034 keys.append( 1035 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1036 ) 1037 values.append(e.expression) 1038 else: 1039 keys.append(exp.Literal.string(f"_{i}")) 1040 values.append(e) 1041 1042 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1043 1044 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1045 if expression.args.get("weight") or expression.args.get("accuracy"): 1046 self.unsupported( 1047 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1048 ) 1049 1050 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1051 1052 def alterset_sql(self, expression: exp.AlterSet) -> str: 1053 exprs = self.expressions(expression, flat=True) 1054 exprs = f" {exprs}" if exprs else "" 1055 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1056 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1057 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1058 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1059 tag = self.expressions(expression, key="tag", flat=True) 1060 tag = f" TAG {tag}" if tag else "" 1061 1062 return f"SET{exprs}{file_format}{copy_options}{tag}"
222class Snowflake(Dialect): 223 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 224 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 225 NULL_ORDERING = "nulls_are_large" 226 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 227 SUPPORTS_USER_DEFINED_TYPES = False 228 SUPPORTS_SEMI_ANTI_JOIN = False 229 PREFER_CTE_ALIAS_COLUMN = True 230 TABLESAMPLE_SIZE_IS_PERCENT = True 231 COPY_PARAMS_ARE_CSV = False 232 233 TIME_MAPPING = { 234 "YYYY": "%Y", 235 "yyyy": "%Y", 236 "YY": "%y", 237 "yy": "%y", 238 "MMMM": "%B", 239 "mmmm": "%B", 240 "MON": "%b", 241 "mon": "%b", 242 "MM": "%m", 243 "mm": "%m", 244 "DD": "%d", 245 "dd": "%-d", 246 "DY": "%a", 247 "dy": "%w", 248 "HH24": "%H", 249 "hh24": "%H", 250 "HH12": "%I", 251 "hh12": "%I", 252 "MI": "%M", 253 "mi": "%M", 254 "SS": "%S", 255 "ss": "%S", 256 "FF": "%f", 257 "ff": "%f", 258 "FF6": "%f", 259 "ff6": "%f", 260 } 261 262 def quote_identifier(self, expression: E, identify: bool = True) -> E: 263 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 264 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 265 if ( 266 isinstance(expression, exp.Identifier) 267 and isinstance(expression.parent, exp.Table) 268 and expression.name.lower() == "dual" 269 ): 270 return expression # type: ignore 271 272 return super().quote_identifier(expression, identify=identify) 273 274 class Parser(parser.Parser): 275 IDENTIFY_PIVOT_STRINGS = True 276 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 277 COLON_IS_VARIANT_EXTRACT = True 278 279 ID_VAR_TOKENS = { 280 *parser.Parser.ID_VAR_TOKENS, 281 TokenType.MATCH_CONDITION, 282 } 283 284 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 285 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 286 287 FUNCTIONS = { 288 **parser.Parser.FUNCTIONS, 289 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 290 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 291 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 292 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 293 this=seq_get(args, 1), expression=seq_get(args, 0) 294 ), 295 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 296 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 297 start=seq_get(args, 0), 298 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 299 step=seq_get(args, 2), 300 ), 301 "BITXOR": binary_from_function(exp.BitwiseXor), 302 "BIT_XOR": binary_from_function(exp.BitwiseXor), 303 "BOOLXOR": binary_from_function(exp.Xor), 304 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 305 "DATE_TRUNC": _date_trunc_to_time, 306 "DATEADD": _build_date_time_add(exp.DateAdd), 307 "DATEDIFF": _build_datediff, 308 "DIV0": _build_if_from_div0, 309 "FLATTEN": exp.Explode.from_arg_list, 310 "GET_PATH": lambda args, dialect: exp.JSONExtract( 311 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 312 ), 313 "IFF": exp.If.from_arg_list, 314 "LAST_DAY": lambda args: exp.LastDay( 315 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 316 ), 317 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 318 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 319 "LISTAGG": exp.GroupConcat.from_arg_list, 320 "MEDIAN": lambda args: exp.PercentileCont( 321 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 322 ), 323 "NULLIFZERO": _build_if_from_nullifzero, 324 "OBJECT_CONSTRUCT": _build_object_construct, 325 "REGEXP_REPLACE": _build_regexp_replace, 326 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 327 "RLIKE": exp.RegexpLike.from_arg_list, 328 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 329 "TIMEADD": _build_date_time_add(exp.TimeAdd), 330 "TIMEDIFF": _build_datediff, 331 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 332 "TIMESTAMPDIFF": _build_datediff, 333 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 334 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 335 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 336 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 337 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 338 "TO_NUMBER": lambda args: exp.ToNumber( 339 this=seq_get(args, 0), 340 format=seq_get(args, 1), 341 precision=seq_get(args, 2), 342 scale=seq_get(args, 3), 343 ), 344 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 345 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 346 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 347 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 348 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 349 "TO_VARCHAR": exp.ToChar.from_arg_list, 350 "ZEROIFNULL": _build_if_from_zeroifnull, 351 } 352 353 FUNCTION_PARSERS = { 354 **parser.Parser.FUNCTION_PARSERS, 355 "DATE_PART": lambda self: self._parse_date_part(), 356 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 357 } 358 FUNCTION_PARSERS.pop("TRIM") 359 360 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 361 362 RANGE_PARSERS = { 363 **parser.Parser.RANGE_PARSERS, 364 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 365 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 366 } 367 368 ALTER_PARSERS = { 369 **parser.Parser.ALTER_PARSERS, 370 "UNSET": lambda self: self.expression( 371 exp.Set, 372 tag=self._match_text_seq("TAG"), 373 expressions=self._parse_csv(self._parse_id_var), 374 unset=True, 375 ), 376 "SWAP": lambda self: self._parse_alter_table_swap(), 377 } 378 379 STATEMENT_PARSERS = { 380 **parser.Parser.STATEMENT_PARSERS, 381 TokenType.SHOW: lambda self: self._parse_show(), 382 } 383 384 PROPERTY_PARSERS = { 385 **parser.Parser.PROPERTY_PARSERS, 386 "LOCATION": lambda self: self._parse_location_property(), 387 } 388 389 TYPE_CONVERTERS = { 390 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 391 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 392 } 393 394 SHOW_PARSERS = { 395 "SCHEMAS": _show_parser("SCHEMAS"), 396 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 397 "OBJECTS": _show_parser("OBJECTS"), 398 "TERSE OBJECTS": _show_parser("OBJECTS"), 399 "TABLES": _show_parser("TABLES"), 400 "TERSE TABLES": _show_parser("TABLES"), 401 "VIEWS": _show_parser("VIEWS"), 402 "TERSE VIEWS": _show_parser("VIEWS"), 403 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 404 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 405 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 406 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 407 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 408 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 409 "SEQUENCES": _show_parser("SEQUENCES"), 410 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 411 "COLUMNS": _show_parser("COLUMNS"), 412 "USERS": _show_parser("USERS"), 413 "TERSE USERS": _show_parser("USERS"), 414 } 415 416 CONSTRAINT_PARSERS = { 417 **parser.Parser.CONSTRAINT_PARSERS, 418 "WITH": lambda self: self._parse_with_constraint(), 419 "MASKING": lambda self: self._parse_with_constraint(), 420 "PROJECTION": lambda self: self._parse_with_constraint(), 421 "TAG": lambda self: self._parse_with_constraint(), 422 } 423 424 STAGED_FILE_SINGLE_TOKENS = { 425 TokenType.DOT, 426 TokenType.MOD, 427 TokenType.SLASH, 428 } 429 430 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 431 432 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 433 434 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 435 436 LAMBDAS = { 437 **parser.Parser.LAMBDAS, 438 TokenType.ARROW: lambda self, expressions: self.expression( 439 exp.Lambda, 440 this=self._replace_lambda( 441 self._parse_assignment(), 442 expressions, 443 ), 444 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 445 ), 446 } 447 448 def _negate_range( 449 self, this: t.Optional[exp.Expression] = None 450 ) -> t.Optional[exp.Expression]: 451 if not this: 452 return this 453 454 query = this.args.get("query") 455 if isinstance(this, exp.In) and isinstance(query, exp.Query): 456 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 457 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 458 # which can produce different results (most likely a SnowFlake bug). 459 # 460 # https://docs.snowflake.com/en/sql-reference/functions/in 461 # Context: https://github.com/tobymao/sqlglot/issues/3890 462 return self.expression( 463 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 464 ) 465 466 return self.expression(exp.Not, this=this) 467 468 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 469 if self._prev.token_type != TokenType.WITH: 470 self._retreat(self._index - 1) 471 472 if self._match_text_seq("MASKING", "POLICY"): 473 policy = self._parse_column() 474 return self.expression( 475 exp.MaskingPolicyColumnConstraint, 476 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 477 expressions=self._match(TokenType.USING) 478 and self._parse_wrapped_csv(self._parse_id_var), 479 ) 480 if self._match_text_seq("PROJECTION", "POLICY"): 481 policy = self._parse_column() 482 return self.expression( 483 exp.ProjectionPolicyColumnConstraint, 484 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 485 ) 486 if self._match(TokenType.TAG): 487 return self.expression( 488 exp.TagColumnConstraint, 489 expressions=self._parse_wrapped_csv(self._parse_property), 490 ) 491 492 return None 493 494 def _parse_create(self) -> exp.Create | exp.Command: 495 expression = super()._parse_create() 496 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 497 # Replace the Table node with the enclosed Identifier 498 expression.this.replace(expression.this.this) 499 500 return expression 501 502 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 503 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 504 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 505 this = self._parse_var() or self._parse_type() 506 507 if not this: 508 return None 509 510 self._match(TokenType.COMMA) 511 expression = self._parse_bitwise() 512 this = map_date_part(this) 513 name = this.name.upper() 514 515 if name.startswith("EPOCH"): 516 if name == "EPOCH_MILLISECOND": 517 scale = 10**3 518 elif name == "EPOCH_MICROSECOND": 519 scale = 10**6 520 elif name == "EPOCH_NANOSECOND": 521 scale = 10**9 522 else: 523 scale = None 524 525 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 526 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 527 528 if scale: 529 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 530 531 return to_unix 532 533 return self.expression(exp.Extract, this=this, expression=expression) 534 535 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 536 if is_map: 537 # Keys are strings in Snowflake's objects, see also: 538 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 539 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 540 return self._parse_slice(self._parse_string()) 541 542 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 543 544 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 545 lateral = super()._parse_lateral() 546 if not lateral: 547 return lateral 548 549 if isinstance(lateral.this, exp.Explode): 550 table_alias = lateral.args.get("alias") 551 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 552 if table_alias and not table_alias.args.get("columns"): 553 table_alias.set("columns", columns) 554 elif not table_alias: 555 exp.alias_(lateral, "_flattened", table=columns, copy=False) 556 557 return lateral 558 559 def _parse_table_parts( 560 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 561 ) -> exp.Table: 562 # https://docs.snowflake.com/en/user-guide/querying-stage 563 if self._match(TokenType.STRING, advance=False): 564 table = self._parse_string() 565 elif self._match_text_seq("@", advance=False): 566 table = self._parse_location_path() 567 else: 568 table = None 569 570 if table: 571 file_format = None 572 pattern = None 573 574 wrapped = self._match(TokenType.L_PAREN) 575 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 576 if self._match_text_seq("FILE_FORMAT", "=>"): 577 file_format = self._parse_string() or super()._parse_table_parts( 578 is_db_reference=is_db_reference 579 ) 580 elif self._match_text_seq("PATTERN", "=>"): 581 pattern = self._parse_string() 582 else: 583 break 584 585 self._match(TokenType.COMMA) 586 587 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 588 else: 589 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 590 591 return table 592 593 def _parse_id_var( 594 self, 595 any_token: bool = True, 596 tokens: t.Optional[t.Collection[TokenType]] = None, 597 ) -> t.Optional[exp.Expression]: 598 if self._match_text_seq("IDENTIFIER", "("): 599 identifier = ( 600 super()._parse_id_var(any_token=any_token, tokens=tokens) 601 or self._parse_string() 602 ) 603 self._match_r_paren() 604 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 605 606 return super()._parse_id_var(any_token=any_token, tokens=tokens) 607 608 def _parse_show_snowflake(self, this: str) -> exp.Show: 609 scope = None 610 scope_kind = None 611 612 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 613 # which is syntactically valid but has no effect on the output 614 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 615 616 history = self._match_text_seq("HISTORY") 617 618 like = self._parse_string() if self._match(TokenType.LIKE) else None 619 620 if self._match(TokenType.IN): 621 if self._match_text_seq("ACCOUNT"): 622 scope_kind = "ACCOUNT" 623 elif self._match_set(self.DB_CREATABLES): 624 scope_kind = self._prev.text.upper() 625 if self._curr: 626 scope = self._parse_table_parts() 627 elif self._curr: 628 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 629 scope = self._parse_table_parts() 630 631 return self.expression( 632 exp.Show, 633 **{ 634 "terse": terse, 635 "this": this, 636 "history": history, 637 "like": like, 638 "scope": scope, 639 "scope_kind": scope_kind, 640 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 641 "limit": self._parse_limit(), 642 "from": self._parse_string() if self._match(TokenType.FROM) else None, 643 }, 644 ) 645 646 def _parse_alter_table_swap(self) -> exp.SwapTable: 647 self._match_text_seq("WITH") 648 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 649 650 def _parse_location_property(self) -> exp.LocationProperty: 651 self._match(TokenType.EQ) 652 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 653 654 def _parse_file_location(self) -> t.Optional[exp.Expression]: 655 # Parse either a subquery or a staged file 656 return ( 657 self._parse_select(table=True, parse_subquery_alias=False) 658 if self._match(TokenType.L_PAREN, advance=False) 659 else self._parse_table_parts() 660 ) 661 662 def _parse_location_path(self) -> exp.Var: 663 parts = [self._advance_any(ignore_reserved=True)] 664 665 # We avoid consuming a comma token because external tables like @foo and @bar 666 # can be joined in a query with a comma separator, as well as closing paren 667 # in case of subqueries 668 while self._is_connected() and not self._match_set( 669 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 670 ): 671 parts.append(self._advance_any(ignore_reserved=True)) 672 673 return exp.var("".join(part.text for part in parts if part)) 674 675 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 676 this = super()._parse_lambda_arg() 677 678 if not this: 679 return this 680 681 typ = self._parse_types() 682 683 if typ: 684 return self.expression(exp.Cast, this=this, to=typ) 685 686 return this 687 688 class Tokenizer(tokens.Tokenizer): 689 STRING_ESCAPES = ["\\", "'"] 690 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 691 RAW_STRINGS = ["$$"] 692 COMMENTS = ["--", "//", ("/*", "*/")] 693 694 KEYWORDS = { 695 **tokens.Tokenizer.KEYWORDS, 696 "BYTEINT": TokenType.INT, 697 "CHAR VARYING": TokenType.VARCHAR, 698 "CHARACTER VARYING": TokenType.VARCHAR, 699 "EXCLUDE": TokenType.EXCEPT, 700 "ILIKE ANY": TokenType.ILIKE_ANY, 701 "LIKE ANY": TokenType.LIKE_ANY, 702 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 703 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 704 "MINUS": TokenType.EXCEPT, 705 "NCHAR VARYING": TokenType.VARCHAR, 706 "PUT": TokenType.COMMAND, 707 "REMOVE": TokenType.COMMAND, 708 "RM": TokenType.COMMAND, 709 "SAMPLE": TokenType.TABLE_SAMPLE, 710 "SQL_DOUBLE": TokenType.DOUBLE, 711 "SQL_VARCHAR": TokenType.VARCHAR, 712 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 713 "TAG": TokenType.TAG, 714 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 715 "TOP": TokenType.TOP, 716 "WAREHOUSE": TokenType.WAREHOUSE, 717 "STREAMLIT": TokenType.STREAMLIT, 718 } 719 KEYWORDS.pop("/*+") 720 721 SINGLE_TOKENS = { 722 **tokens.Tokenizer.SINGLE_TOKENS, 723 "$": TokenType.PARAMETER, 724 } 725 726 VAR_SINGLE_TOKENS = {"$"} 727 728 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 729 730 class Generator(generator.Generator): 731 PARAMETER_TOKEN = "$" 732 MATCHED_BY_SOURCE = False 733 SINGLE_STRING_INTERVAL = True 734 JOIN_HINTS = False 735 TABLE_HINTS = False 736 QUERY_HINTS = False 737 AGGREGATE_FILTER_SUPPORTED = False 738 SUPPORTS_TABLE_COPY = False 739 COLLATE_IS_FUNC = True 740 LIMIT_ONLY_LITERALS = True 741 JSON_KEY_VALUE_PAIR_SEP = "," 742 INSERT_OVERWRITE = " OVERWRITE INTO" 743 STRUCT_DELIMITER = ("(", ")") 744 COPY_PARAMS_ARE_WRAPPED = False 745 COPY_PARAMS_EQ_REQUIRED = True 746 STAR_EXCEPT = "EXCLUDE" 747 SUPPORTS_EXPLODING_PROJECTIONS = False 748 ARRAY_CONCAT_IS_VAR_LEN = False 749 SUPPORTS_CONVERT_TIMEZONE = True 750 751 TRANSFORMS = { 752 **generator.Generator.TRANSFORMS, 753 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 754 exp.ArgMax: rename_func("MAX_BY"), 755 exp.ArgMin: rename_func("MIN_BY"), 756 exp.Array: inline_array_sql, 757 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 758 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 759 exp.AtTimeZone: lambda self, e: self.func( 760 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 761 ), 762 exp.BitwiseXor: rename_func("BITXOR"), 763 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 764 exp.DateAdd: date_delta_sql("DATEADD"), 765 exp.DateDiff: date_delta_sql("DATEDIFF"), 766 exp.DateStrToDate: datestrtodate_sql, 767 exp.DayOfMonth: rename_func("DAYOFMONTH"), 768 exp.DayOfWeek: rename_func("DAYOFWEEK"), 769 exp.DayOfYear: rename_func("DAYOFYEAR"), 770 exp.Explode: rename_func("FLATTEN"), 771 exp.Extract: rename_func("DATE_PART"), 772 exp.FromTimeZone: lambda self, e: self.func( 773 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 774 ), 775 exp.GenerateSeries: lambda self, e: self.func( 776 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 777 ), 778 exp.GroupConcat: rename_func("LISTAGG"), 779 exp.If: if_sql(name="IFF", false_value="NULL"), 780 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 781 exp.JSONExtractScalar: lambda self, e: self.func( 782 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 783 ), 784 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 785 exp.JSONPathRoot: lambda *_: "", 786 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 787 exp.LogicalOr: rename_func("BOOLOR_AGG"), 788 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 789 exp.Max: max_or_greatest, 790 exp.Min: min_or_least, 791 exp.ParseJSON: lambda self, e: self.func( 792 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 793 ), 794 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 795 exp.PercentileCont: transforms.preprocess( 796 [transforms.add_within_group_for_percentiles] 797 ), 798 exp.PercentileDisc: transforms.preprocess( 799 [transforms.add_within_group_for_percentiles] 800 ), 801 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 802 exp.RegexpILike: _regexpilike_sql, 803 exp.Rand: rename_func("RANDOM"), 804 exp.Select: transforms.preprocess( 805 [ 806 transforms.eliminate_distinct_on, 807 transforms.explode_to_unnest(), 808 transforms.eliminate_semi_and_anti_joins, 809 _unnest_generate_date_array, 810 ] 811 ), 812 exp.SHA: rename_func("SHA1"), 813 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 814 exp.StartsWith: rename_func("STARTSWITH"), 815 exp.StrPosition: lambda self, e: self.func( 816 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 817 ), 818 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 819 exp.Stuff: rename_func("INSERT"), 820 exp.TimeAdd: date_delta_sql("TIMEADD"), 821 exp.TimestampDiff: lambda self, e: self.func( 822 "TIMESTAMPDIFF", e.unit, e.expression, e.this 823 ), 824 exp.TimestampTrunc: timestamptrunc_sql(), 825 exp.TimeStrToTime: timestrtotime_sql, 826 exp.TimeToStr: lambda self, e: self.func( 827 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 828 ), 829 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 830 exp.ToArray: rename_func("TO_ARRAY"), 831 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 832 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 833 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 834 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 835 exp.TsOrDsToDate: lambda self, e: self.func( 836 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 837 ), 838 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 839 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 840 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 841 exp.Xor: rename_func("BOOLXOR"), 842 } 843 844 SUPPORTED_JSON_PATH_PARTS = { 845 exp.JSONPathKey, 846 exp.JSONPathRoot, 847 exp.JSONPathSubscript, 848 } 849 850 TYPE_MAPPING = { 851 **generator.Generator.TYPE_MAPPING, 852 exp.DataType.Type.NESTED: "OBJECT", 853 exp.DataType.Type.STRUCT: "OBJECT", 854 } 855 856 PROPERTIES_LOCATION = { 857 **generator.Generator.PROPERTIES_LOCATION, 858 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 859 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 860 } 861 862 UNSUPPORTED_VALUES_EXPRESSIONS = { 863 exp.Map, 864 exp.StarMap, 865 exp.Struct, 866 exp.VarMap, 867 } 868 869 def with_properties(self, properties: exp.Properties) -> str: 870 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 871 872 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 873 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 874 values_as_table = False 875 876 return super().values_sql(expression, values_as_table=values_as_table) 877 878 def datatype_sql(self, expression: exp.DataType) -> str: 879 expressions = expression.expressions 880 if ( 881 expressions 882 and expression.is_type(*exp.DataType.STRUCT_TYPES) 883 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 884 ): 885 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 886 return "OBJECT" 887 888 return super().datatype_sql(expression) 889 890 def tonumber_sql(self, expression: exp.ToNumber) -> str: 891 return self.func( 892 "TO_NUMBER", 893 expression.this, 894 expression.args.get("format"), 895 expression.args.get("precision"), 896 expression.args.get("scale"), 897 ) 898 899 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 900 milli = expression.args.get("milli") 901 if milli is not None: 902 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 903 expression.set("nano", milli_to_nano) 904 905 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 906 907 def trycast_sql(self, expression: exp.TryCast) -> str: 908 value = expression.this 909 910 if value.type is None: 911 from sqlglot.optimizer.annotate_types import annotate_types 912 913 value = annotate_types(value) 914 915 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 916 return super().trycast_sql(expression) 917 918 # TRY_CAST only works for string values in Snowflake 919 return self.cast_sql(expression) 920 921 def log_sql(self, expression: exp.Log) -> str: 922 if not expression.expression: 923 return self.func("LN", expression.this) 924 925 return super().log_sql(expression) 926 927 def unnest_sql(self, expression: exp.Unnest) -> str: 928 unnest_alias = expression.args.get("alias") 929 offset = expression.args.get("offset") 930 931 columns = [ 932 exp.to_identifier("seq"), 933 exp.to_identifier("key"), 934 exp.to_identifier("path"), 935 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 936 seq_get(unnest_alias.columns if unnest_alias else [], 0) 937 or exp.to_identifier("value"), 938 exp.to_identifier("this"), 939 ] 940 941 if unnest_alias: 942 unnest_alias.set("columns", columns) 943 else: 944 unnest_alias = exp.TableAlias(this="_u", columns=columns) 945 946 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 947 alias = self.sql(unnest_alias) 948 alias = f" AS {alias}" if alias else "" 949 return f"{explode}{alias}" 950 951 def show_sql(self, expression: exp.Show) -> str: 952 terse = "TERSE " if expression.args.get("terse") else "" 953 history = " HISTORY" if expression.args.get("history") else "" 954 like = self.sql(expression, "like") 955 like = f" LIKE {like}" if like else "" 956 957 scope = self.sql(expression, "scope") 958 scope = f" {scope}" if scope else "" 959 960 scope_kind = self.sql(expression, "scope_kind") 961 if scope_kind: 962 scope_kind = f" IN {scope_kind}" 963 964 starts_with = self.sql(expression, "starts_with") 965 if starts_with: 966 starts_with = f" STARTS WITH {starts_with}" 967 968 limit = self.sql(expression, "limit") 969 970 from_ = self.sql(expression, "from") 971 if from_: 972 from_ = f" FROM {from_}" 973 974 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 975 976 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 977 # Other dialects don't support all of the following parameters, so we need to 978 # generate default values as necessary to ensure the transpilation is correct 979 group = expression.args.get("group") 980 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 981 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 982 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 983 984 return self.func( 985 "REGEXP_SUBSTR", 986 expression.this, 987 expression.expression, 988 position, 989 occurrence, 990 parameters, 991 group, 992 ) 993 994 def except_op(self, expression: exp.Except) -> str: 995 if not expression.args.get("distinct"): 996 self.unsupported("EXCEPT with All is not supported in Snowflake") 997 return super().except_op(expression) 998 999 def intersect_op(self, expression: exp.Intersect) -> str: 1000 if not expression.args.get("distinct"): 1001 self.unsupported("INTERSECT with All is not supported in Snowflake") 1002 return super().intersect_op(expression) 1003 1004 def describe_sql(self, expression: exp.Describe) -> str: 1005 # Default to table if kind is unknown 1006 kind_value = expression.args.get("kind") or "TABLE" 1007 kind = f" {kind_value}" if kind_value else "" 1008 this = f" {self.sql(expression, 'this')}" 1009 expressions = self.expressions(expression, flat=True) 1010 expressions = f" {expressions}" if expressions else "" 1011 return f"DESCRIBE{kind}{this}{expressions}" 1012 1013 def generatedasidentitycolumnconstraint_sql( 1014 self, expression: exp.GeneratedAsIdentityColumnConstraint 1015 ) -> str: 1016 start = expression.args.get("start") 1017 start = f" START {start}" if start else "" 1018 increment = expression.args.get("increment") 1019 increment = f" INCREMENT {increment}" if increment else "" 1020 return f"AUTOINCREMENT{start}{increment}" 1021 1022 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1023 this = self.sql(expression, "this") 1024 return f"SWAP WITH {this}" 1025 1026 def cluster_sql(self, expression: exp.Cluster) -> str: 1027 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1028 1029 def struct_sql(self, expression: exp.Struct) -> str: 1030 keys = [] 1031 values = [] 1032 1033 for i, e in enumerate(expression.expressions): 1034 if isinstance(e, exp.PropertyEQ): 1035 keys.append( 1036 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1037 ) 1038 values.append(e.expression) 1039 else: 1040 keys.append(exp.Literal.string(f"_{i}")) 1041 values.append(e) 1042 1043 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1044 1045 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1046 if expression.args.get("weight") or expression.args.get("accuracy"): 1047 self.unsupported( 1048 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1049 ) 1050 1051 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1052 1053 def alterset_sql(self, expression: exp.AlterSet) -> str: 1054 exprs = self.expressions(expression, flat=True) 1055 exprs = f" {exprs}" if exprs else "" 1056 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1057 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1058 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1059 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1060 tag = self.expressions(expression, key="tag", flat=True) 1061 tag = f" TAG {tag}" if tag else "" 1062 1063 return f"SET{exprs}{file_format}{copy_options}{tag}"
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
262 def quote_identifier(self, expression: E, identify: bool = True) -> E: 263 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 264 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 265 if ( 266 isinstance(expression, exp.Identifier) 267 and isinstance(expression.parent, exp.Table) 268 and expression.name.lower() == "dual" 269 ): 270 return expression # type: ignore 271 272 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
274 class Parser(parser.Parser): 275 IDENTIFY_PIVOT_STRINGS = True 276 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 277 COLON_IS_VARIANT_EXTRACT = True 278 279 ID_VAR_TOKENS = { 280 *parser.Parser.ID_VAR_TOKENS, 281 TokenType.MATCH_CONDITION, 282 } 283 284 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 285 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 286 287 FUNCTIONS = { 288 **parser.Parser.FUNCTIONS, 289 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 290 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 291 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 292 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 293 this=seq_get(args, 1), expression=seq_get(args, 0) 294 ), 295 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 296 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 297 start=seq_get(args, 0), 298 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 299 step=seq_get(args, 2), 300 ), 301 "BITXOR": binary_from_function(exp.BitwiseXor), 302 "BIT_XOR": binary_from_function(exp.BitwiseXor), 303 "BOOLXOR": binary_from_function(exp.Xor), 304 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 305 "DATE_TRUNC": _date_trunc_to_time, 306 "DATEADD": _build_date_time_add(exp.DateAdd), 307 "DATEDIFF": _build_datediff, 308 "DIV0": _build_if_from_div0, 309 "FLATTEN": exp.Explode.from_arg_list, 310 "GET_PATH": lambda args, dialect: exp.JSONExtract( 311 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 312 ), 313 "IFF": exp.If.from_arg_list, 314 "LAST_DAY": lambda args: exp.LastDay( 315 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 316 ), 317 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 318 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 319 "LISTAGG": exp.GroupConcat.from_arg_list, 320 "MEDIAN": lambda args: exp.PercentileCont( 321 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 322 ), 323 "NULLIFZERO": _build_if_from_nullifzero, 324 "OBJECT_CONSTRUCT": _build_object_construct, 325 "REGEXP_REPLACE": _build_regexp_replace, 326 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 327 "RLIKE": exp.RegexpLike.from_arg_list, 328 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 329 "TIMEADD": _build_date_time_add(exp.TimeAdd), 330 "TIMEDIFF": _build_datediff, 331 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 332 "TIMESTAMPDIFF": _build_datediff, 333 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 334 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 335 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 336 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 337 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 338 "TO_NUMBER": lambda args: exp.ToNumber( 339 this=seq_get(args, 0), 340 format=seq_get(args, 1), 341 precision=seq_get(args, 2), 342 scale=seq_get(args, 3), 343 ), 344 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 345 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 346 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 347 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 348 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 349 "TO_VARCHAR": exp.ToChar.from_arg_list, 350 "ZEROIFNULL": _build_if_from_zeroifnull, 351 } 352 353 FUNCTION_PARSERS = { 354 **parser.Parser.FUNCTION_PARSERS, 355 "DATE_PART": lambda self: self._parse_date_part(), 356 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 357 } 358 FUNCTION_PARSERS.pop("TRIM") 359 360 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 361 362 RANGE_PARSERS = { 363 **parser.Parser.RANGE_PARSERS, 364 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 365 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 366 } 367 368 ALTER_PARSERS = { 369 **parser.Parser.ALTER_PARSERS, 370 "UNSET": lambda self: self.expression( 371 exp.Set, 372 tag=self._match_text_seq("TAG"), 373 expressions=self._parse_csv(self._parse_id_var), 374 unset=True, 375 ), 376 "SWAP": lambda self: self._parse_alter_table_swap(), 377 } 378 379 STATEMENT_PARSERS = { 380 **parser.Parser.STATEMENT_PARSERS, 381 TokenType.SHOW: lambda self: self._parse_show(), 382 } 383 384 PROPERTY_PARSERS = { 385 **parser.Parser.PROPERTY_PARSERS, 386 "LOCATION": lambda self: self._parse_location_property(), 387 } 388 389 TYPE_CONVERTERS = { 390 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 391 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 392 } 393 394 SHOW_PARSERS = { 395 "SCHEMAS": _show_parser("SCHEMAS"), 396 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 397 "OBJECTS": _show_parser("OBJECTS"), 398 "TERSE OBJECTS": _show_parser("OBJECTS"), 399 "TABLES": _show_parser("TABLES"), 400 "TERSE TABLES": _show_parser("TABLES"), 401 "VIEWS": _show_parser("VIEWS"), 402 "TERSE VIEWS": _show_parser("VIEWS"), 403 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 404 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 405 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 406 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 407 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 408 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 409 "SEQUENCES": _show_parser("SEQUENCES"), 410 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 411 "COLUMNS": _show_parser("COLUMNS"), 412 "USERS": _show_parser("USERS"), 413 "TERSE USERS": _show_parser("USERS"), 414 } 415 416 CONSTRAINT_PARSERS = { 417 **parser.Parser.CONSTRAINT_PARSERS, 418 "WITH": lambda self: self._parse_with_constraint(), 419 "MASKING": lambda self: self._parse_with_constraint(), 420 "PROJECTION": lambda self: self._parse_with_constraint(), 421 "TAG": lambda self: self._parse_with_constraint(), 422 } 423 424 STAGED_FILE_SINGLE_TOKENS = { 425 TokenType.DOT, 426 TokenType.MOD, 427 TokenType.SLASH, 428 } 429 430 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 431 432 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 433 434 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 435 436 LAMBDAS = { 437 **parser.Parser.LAMBDAS, 438 TokenType.ARROW: lambda self, expressions: self.expression( 439 exp.Lambda, 440 this=self._replace_lambda( 441 self._parse_assignment(), 442 expressions, 443 ), 444 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 445 ), 446 } 447 448 def _negate_range( 449 self, this: t.Optional[exp.Expression] = None 450 ) -> t.Optional[exp.Expression]: 451 if not this: 452 return this 453 454 query = this.args.get("query") 455 if isinstance(this, exp.In) and isinstance(query, exp.Query): 456 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 457 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 458 # which can produce different results (most likely a SnowFlake bug). 459 # 460 # https://docs.snowflake.com/en/sql-reference/functions/in 461 # Context: https://github.com/tobymao/sqlglot/issues/3890 462 return self.expression( 463 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 464 ) 465 466 return self.expression(exp.Not, this=this) 467 468 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 469 if self._prev.token_type != TokenType.WITH: 470 self._retreat(self._index - 1) 471 472 if self._match_text_seq("MASKING", "POLICY"): 473 policy = self._parse_column() 474 return self.expression( 475 exp.MaskingPolicyColumnConstraint, 476 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 477 expressions=self._match(TokenType.USING) 478 and self._parse_wrapped_csv(self._parse_id_var), 479 ) 480 if self._match_text_seq("PROJECTION", "POLICY"): 481 policy = self._parse_column() 482 return self.expression( 483 exp.ProjectionPolicyColumnConstraint, 484 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 485 ) 486 if self._match(TokenType.TAG): 487 return self.expression( 488 exp.TagColumnConstraint, 489 expressions=self._parse_wrapped_csv(self._parse_property), 490 ) 491 492 return None 493 494 def _parse_create(self) -> exp.Create | exp.Command: 495 expression = super()._parse_create() 496 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 497 # Replace the Table node with the enclosed Identifier 498 expression.this.replace(expression.this.this) 499 500 return expression 501 502 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 503 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 504 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 505 this = self._parse_var() or self._parse_type() 506 507 if not this: 508 return None 509 510 self._match(TokenType.COMMA) 511 expression = self._parse_bitwise() 512 this = map_date_part(this) 513 name = this.name.upper() 514 515 if name.startswith("EPOCH"): 516 if name == "EPOCH_MILLISECOND": 517 scale = 10**3 518 elif name == "EPOCH_MICROSECOND": 519 scale = 10**6 520 elif name == "EPOCH_NANOSECOND": 521 scale = 10**9 522 else: 523 scale = None 524 525 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 526 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 527 528 if scale: 529 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 530 531 return to_unix 532 533 return self.expression(exp.Extract, this=this, expression=expression) 534 535 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 536 if is_map: 537 # Keys are strings in Snowflake's objects, see also: 538 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 539 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 540 return self._parse_slice(self._parse_string()) 541 542 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 543 544 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 545 lateral = super()._parse_lateral() 546 if not lateral: 547 return lateral 548 549 if isinstance(lateral.this, exp.Explode): 550 table_alias = lateral.args.get("alias") 551 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 552 if table_alias and not table_alias.args.get("columns"): 553 table_alias.set("columns", columns) 554 elif not table_alias: 555 exp.alias_(lateral, "_flattened", table=columns, copy=False) 556 557 return lateral 558 559 def _parse_table_parts( 560 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 561 ) -> exp.Table: 562 # https://docs.snowflake.com/en/user-guide/querying-stage 563 if self._match(TokenType.STRING, advance=False): 564 table = self._parse_string() 565 elif self._match_text_seq("@", advance=False): 566 table = self._parse_location_path() 567 else: 568 table = None 569 570 if table: 571 file_format = None 572 pattern = None 573 574 wrapped = self._match(TokenType.L_PAREN) 575 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 576 if self._match_text_seq("FILE_FORMAT", "=>"): 577 file_format = self._parse_string() or super()._parse_table_parts( 578 is_db_reference=is_db_reference 579 ) 580 elif self._match_text_seq("PATTERN", "=>"): 581 pattern = self._parse_string() 582 else: 583 break 584 585 self._match(TokenType.COMMA) 586 587 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 588 else: 589 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 590 591 return table 592 593 def _parse_id_var( 594 self, 595 any_token: bool = True, 596 tokens: t.Optional[t.Collection[TokenType]] = None, 597 ) -> t.Optional[exp.Expression]: 598 if self._match_text_seq("IDENTIFIER", "("): 599 identifier = ( 600 super()._parse_id_var(any_token=any_token, tokens=tokens) 601 or self._parse_string() 602 ) 603 self._match_r_paren() 604 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 605 606 return super()._parse_id_var(any_token=any_token, tokens=tokens) 607 608 def _parse_show_snowflake(self, this: str) -> exp.Show: 609 scope = None 610 scope_kind = None 611 612 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 613 # which is syntactically valid but has no effect on the output 614 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 615 616 history = self._match_text_seq("HISTORY") 617 618 like = self._parse_string() if self._match(TokenType.LIKE) else None 619 620 if self._match(TokenType.IN): 621 if self._match_text_seq("ACCOUNT"): 622 scope_kind = "ACCOUNT" 623 elif self._match_set(self.DB_CREATABLES): 624 scope_kind = self._prev.text.upper() 625 if self._curr: 626 scope = self._parse_table_parts() 627 elif self._curr: 628 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 629 scope = self._parse_table_parts() 630 631 return self.expression( 632 exp.Show, 633 **{ 634 "terse": terse, 635 "this": this, 636 "history": history, 637 "like": like, 638 "scope": scope, 639 "scope_kind": scope_kind, 640 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 641 "limit": self._parse_limit(), 642 "from": self._parse_string() if self._match(TokenType.FROM) else None, 643 }, 644 ) 645 646 def _parse_alter_table_swap(self) -> exp.SwapTable: 647 self._match_text_seq("WITH") 648 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 649 650 def _parse_location_property(self) -> exp.LocationProperty: 651 self._match(TokenType.EQ) 652 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 653 654 def _parse_file_location(self) -> t.Optional[exp.Expression]: 655 # Parse either a subquery or a staged file 656 return ( 657 self._parse_select(table=True, parse_subquery_alias=False) 658 if self._match(TokenType.L_PAREN, advance=False) 659 else self._parse_table_parts() 660 ) 661 662 def _parse_location_path(self) -> exp.Var: 663 parts = [self._advance_any(ignore_reserved=True)] 664 665 # We avoid consuming a comma token because external tables like @foo and @bar 666 # can be joined in a query with a comma separator, as well as closing paren 667 # in case of subqueries 668 while self._is_connected() and not self._match_set( 669 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 670 ): 671 parts.append(self._advance_any(ignore_reserved=True)) 672 673 return exp.var("".join(part.text for part in parts if part)) 674 675 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 676 this = super()._parse_lambda_arg() 677 678 if not this: 679 return this 680 681 typ = self._parse_types() 682 683 if typ: 684 return self.expression(exp.Cast, this=this, to=typ) 685 686 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
688 class Tokenizer(tokens.Tokenizer): 689 STRING_ESCAPES = ["\\", "'"] 690 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 691 RAW_STRINGS = ["$$"] 692 COMMENTS = ["--", "//", ("/*", "*/")] 693 694 KEYWORDS = { 695 **tokens.Tokenizer.KEYWORDS, 696 "BYTEINT": TokenType.INT, 697 "CHAR VARYING": TokenType.VARCHAR, 698 "CHARACTER VARYING": TokenType.VARCHAR, 699 "EXCLUDE": TokenType.EXCEPT, 700 "ILIKE ANY": TokenType.ILIKE_ANY, 701 "LIKE ANY": TokenType.LIKE_ANY, 702 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 703 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 704 "MINUS": TokenType.EXCEPT, 705 "NCHAR VARYING": TokenType.VARCHAR, 706 "PUT": TokenType.COMMAND, 707 "REMOVE": TokenType.COMMAND, 708 "RM": TokenType.COMMAND, 709 "SAMPLE": TokenType.TABLE_SAMPLE, 710 "SQL_DOUBLE": TokenType.DOUBLE, 711 "SQL_VARCHAR": TokenType.VARCHAR, 712 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 713 "TAG": TokenType.TAG, 714 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 715 "TOP": TokenType.TOP, 716 "WAREHOUSE": TokenType.WAREHOUSE, 717 "STREAMLIT": TokenType.STREAMLIT, 718 } 719 KEYWORDS.pop("/*+") 720 721 SINGLE_TOKENS = { 722 **tokens.Tokenizer.SINGLE_TOKENS, 723 "$": TokenType.PARAMETER, 724 } 725 726 VAR_SINGLE_TOKENS = {"$"} 727 728 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
730 class Generator(generator.Generator): 731 PARAMETER_TOKEN = "$" 732 MATCHED_BY_SOURCE = False 733 SINGLE_STRING_INTERVAL = True 734 JOIN_HINTS = False 735 TABLE_HINTS = False 736 QUERY_HINTS = False 737 AGGREGATE_FILTER_SUPPORTED = False 738 SUPPORTS_TABLE_COPY = False 739 COLLATE_IS_FUNC = True 740 LIMIT_ONLY_LITERALS = True 741 JSON_KEY_VALUE_PAIR_SEP = "," 742 INSERT_OVERWRITE = " OVERWRITE INTO" 743 STRUCT_DELIMITER = ("(", ")") 744 COPY_PARAMS_ARE_WRAPPED = False 745 COPY_PARAMS_EQ_REQUIRED = True 746 STAR_EXCEPT = "EXCLUDE" 747 SUPPORTS_EXPLODING_PROJECTIONS = False 748 ARRAY_CONCAT_IS_VAR_LEN = False 749 SUPPORTS_CONVERT_TIMEZONE = True 750 751 TRANSFORMS = { 752 **generator.Generator.TRANSFORMS, 753 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 754 exp.ArgMax: rename_func("MAX_BY"), 755 exp.ArgMin: rename_func("MIN_BY"), 756 exp.Array: inline_array_sql, 757 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 758 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 759 exp.AtTimeZone: lambda self, e: self.func( 760 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 761 ), 762 exp.BitwiseXor: rename_func("BITXOR"), 763 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 764 exp.DateAdd: date_delta_sql("DATEADD"), 765 exp.DateDiff: date_delta_sql("DATEDIFF"), 766 exp.DateStrToDate: datestrtodate_sql, 767 exp.DayOfMonth: rename_func("DAYOFMONTH"), 768 exp.DayOfWeek: rename_func("DAYOFWEEK"), 769 exp.DayOfYear: rename_func("DAYOFYEAR"), 770 exp.Explode: rename_func("FLATTEN"), 771 exp.Extract: rename_func("DATE_PART"), 772 exp.FromTimeZone: lambda self, e: self.func( 773 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 774 ), 775 exp.GenerateSeries: lambda self, e: self.func( 776 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 777 ), 778 exp.GroupConcat: rename_func("LISTAGG"), 779 exp.If: if_sql(name="IFF", false_value="NULL"), 780 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 781 exp.JSONExtractScalar: lambda self, e: self.func( 782 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 783 ), 784 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 785 exp.JSONPathRoot: lambda *_: "", 786 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 787 exp.LogicalOr: rename_func("BOOLOR_AGG"), 788 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 789 exp.Max: max_or_greatest, 790 exp.Min: min_or_least, 791 exp.ParseJSON: lambda self, e: self.func( 792 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 793 ), 794 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 795 exp.PercentileCont: transforms.preprocess( 796 [transforms.add_within_group_for_percentiles] 797 ), 798 exp.PercentileDisc: transforms.preprocess( 799 [transforms.add_within_group_for_percentiles] 800 ), 801 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 802 exp.RegexpILike: _regexpilike_sql, 803 exp.Rand: rename_func("RANDOM"), 804 exp.Select: transforms.preprocess( 805 [ 806 transforms.eliminate_distinct_on, 807 transforms.explode_to_unnest(), 808 transforms.eliminate_semi_and_anti_joins, 809 _unnest_generate_date_array, 810 ] 811 ), 812 exp.SHA: rename_func("SHA1"), 813 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 814 exp.StartsWith: rename_func("STARTSWITH"), 815 exp.StrPosition: lambda self, e: self.func( 816 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 817 ), 818 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 819 exp.Stuff: rename_func("INSERT"), 820 exp.TimeAdd: date_delta_sql("TIMEADD"), 821 exp.TimestampDiff: lambda self, e: self.func( 822 "TIMESTAMPDIFF", e.unit, e.expression, e.this 823 ), 824 exp.TimestampTrunc: timestamptrunc_sql(), 825 exp.TimeStrToTime: timestrtotime_sql, 826 exp.TimeToStr: lambda self, e: self.func( 827 "TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e) 828 ), 829 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 830 exp.ToArray: rename_func("TO_ARRAY"), 831 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 832 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 833 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 834 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 835 exp.TsOrDsToDate: lambda self, e: self.func( 836 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 837 ), 838 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 839 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 840 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 841 exp.Xor: rename_func("BOOLXOR"), 842 } 843 844 SUPPORTED_JSON_PATH_PARTS = { 845 exp.JSONPathKey, 846 exp.JSONPathRoot, 847 exp.JSONPathSubscript, 848 } 849 850 TYPE_MAPPING = { 851 **generator.Generator.TYPE_MAPPING, 852 exp.DataType.Type.NESTED: "OBJECT", 853 exp.DataType.Type.STRUCT: "OBJECT", 854 } 855 856 PROPERTIES_LOCATION = { 857 **generator.Generator.PROPERTIES_LOCATION, 858 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 859 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 860 } 861 862 UNSUPPORTED_VALUES_EXPRESSIONS = { 863 exp.Map, 864 exp.StarMap, 865 exp.Struct, 866 exp.VarMap, 867 } 868 869 def with_properties(self, properties: exp.Properties) -> str: 870 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 871 872 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 873 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 874 values_as_table = False 875 876 return super().values_sql(expression, values_as_table=values_as_table) 877 878 def datatype_sql(self, expression: exp.DataType) -> str: 879 expressions = expression.expressions 880 if ( 881 expressions 882 and expression.is_type(*exp.DataType.STRUCT_TYPES) 883 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 884 ): 885 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 886 return "OBJECT" 887 888 return super().datatype_sql(expression) 889 890 def tonumber_sql(self, expression: exp.ToNumber) -> str: 891 return self.func( 892 "TO_NUMBER", 893 expression.this, 894 expression.args.get("format"), 895 expression.args.get("precision"), 896 expression.args.get("scale"), 897 ) 898 899 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 900 milli = expression.args.get("milli") 901 if milli is not None: 902 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 903 expression.set("nano", milli_to_nano) 904 905 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 906 907 def trycast_sql(self, expression: exp.TryCast) -> str: 908 value = expression.this 909 910 if value.type is None: 911 from sqlglot.optimizer.annotate_types import annotate_types 912 913 value = annotate_types(value) 914 915 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 916 return super().trycast_sql(expression) 917 918 # TRY_CAST only works for string values in Snowflake 919 return self.cast_sql(expression) 920 921 def log_sql(self, expression: exp.Log) -> str: 922 if not expression.expression: 923 return self.func("LN", expression.this) 924 925 return super().log_sql(expression) 926 927 def unnest_sql(self, expression: exp.Unnest) -> str: 928 unnest_alias = expression.args.get("alias") 929 offset = expression.args.get("offset") 930 931 columns = [ 932 exp.to_identifier("seq"), 933 exp.to_identifier("key"), 934 exp.to_identifier("path"), 935 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 936 seq_get(unnest_alias.columns if unnest_alias else [], 0) 937 or exp.to_identifier("value"), 938 exp.to_identifier("this"), 939 ] 940 941 if unnest_alias: 942 unnest_alias.set("columns", columns) 943 else: 944 unnest_alias = exp.TableAlias(this="_u", columns=columns) 945 946 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 947 alias = self.sql(unnest_alias) 948 alias = f" AS {alias}" if alias else "" 949 return f"{explode}{alias}" 950 951 def show_sql(self, expression: exp.Show) -> str: 952 terse = "TERSE " if expression.args.get("terse") else "" 953 history = " HISTORY" if expression.args.get("history") else "" 954 like = self.sql(expression, "like") 955 like = f" LIKE {like}" if like else "" 956 957 scope = self.sql(expression, "scope") 958 scope = f" {scope}" if scope else "" 959 960 scope_kind = self.sql(expression, "scope_kind") 961 if scope_kind: 962 scope_kind = f" IN {scope_kind}" 963 964 starts_with = self.sql(expression, "starts_with") 965 if starts_with: 966 starts_with = f" STARTS WITH {starts_with}" 967 968 limit = self.sql(expression, "limit") 969 970 from_ = self.sql(expression, "from") 971 if from_: 972 from_ = f" FROM {from_}" 973 974 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 975 976 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 977 # Other dialects don't support all of the following parameters, so we need to 978 # generate default values as necessary to ensure the transpilation is correct 979 group = expression.args.get("group") 980 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 981 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 982 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 983 984 return self.func( 985 "REGEXP_SUBSTR", 986 expression.this, 987 expression.expression, 988 position, 989 occurrence, 990 parameters, 991 group, 992 ) 993 994 def except_op(self, expression: exp.Except) -> str: 995 if not expression.args.get("distinct"): 996 self.unsupported("EXCEPT with All is not supported in Snowflake") 997 return super().except_op(expression) 998 999 def intersect_op(self, expression: exp.Intersect) -> str: 1000 if not expression.args.get("distinct"): 1001 self.unsupported("INTERSECT with All is not supported in Snowflake") 1002 return super().intersect_op(expression) 1003 1004 def describe_sql(self, expression: exp.Describe) -> str: 1005 # Default to table if kind is unknown 1006 kind_value = expression.args.get("kind") or "TABLE" 1007 kind = f" {kind_value}" if kind_value else "" 1008 this = f" {self.sql(expression, 'this')}" 1009 expressions = self.expressions(expression, flat=True) 1010 expressions = f" {expressions}" if expressions else "" 1011 return f"DESCRIBE{kind}{this}{expressions}" 1012 1013 def generatedasidentitycolumnconstraint_sql( 1014 self, expression: exp.GeneratedAsIdentityColumnConstraint 1015 ) -> str: 1016 start = expression.args.get("start") 1017 start = f" START {start}" if start else "" 1018 increment = expression.args.get("increment") 1019 increment = f" INCREMENT {increment}" if increment else "" 1020 return f"AUTOINCREMENT{start}{increment}" 1021 1022 def swaptable_sql(self, expression: exp.SwapTable) -> str: 1023 this = self.sql(expression, "this") 1024 return f"SWAP WITH {this}" 1025 1026 def cluster_sql(self, expression: exp.Cluster) -> str: 1027 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1028 1029 def struct_sql(self, expression: exp.Struct) -> str: 1030 keys = [] 1031 values = [] 1032 1033 for i, e in enumerate(expression.expressions): 1034 if isinstance(e, exp.PropertyEQ): 1035 keys.append( 1036 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1037 ) 1038 values.append(e.expression) 1039 else: 1040 keys.append(exp.Literal.string(f"_{i}")) 1041 values.append(e) 1042 1043 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1044 1045 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1046 if expression.args.get("weight") or expression.args.get("accuracy"): 1047 self.unsupported( 1048 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1049 ) 1050 1051 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1052 1053 def alterset_sql(self, expression: exp.AlterSet) -> str: 1054 exprs = self.expressions(expression, flat=True) 1055 exprs = f" {exprs}" if exprs else "" 1056 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1057 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1058 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1059 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1060 tag = self.expressions(expression, key="tag", flat=True) 1061 tag = f" TAG {tag}" if tag else "" 1062 1063 return f"SET{exprs}{file_format}{copy_options}{tag}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
878 def datatype_sql(self, expression: exp.DataType) -> str: 879 expressions = expression.expressions 880 if ( 881 expressions 882 and expression.is_type(*exp.DataType.STRUCT_TYPES) 883 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 884 ): 885 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 886 return "OBJECT" 887 888 return super().datatype_sql(expression)
899 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 900 milli = expression.args.get("milli") 901 if milli is not None: 902 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 903 expression.set("nano", milli_to_nano) 904 905 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
907 def trycast_sql(self, expression: exp.TryCast) -> str: 908 value = expression.this 909 910 if value.type is None: 911 from sqlglot.optimizer.annotate_types import annotate_types 912 913 value = annotate_types(value) 914 915 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 916 return super().trycast_sql(expression) 917 918 # TRY_CAST only works for string values in Snowflake 919 return self.cast_sql(expression)
927 def unnest_sql(self, expression: exp.Unnest) -> str: 928 unnest_alias = expression.args.get("alias") 929 offset = expression.args.get("offset") 930 931 columns = [ 932 exp.to_identifier("seq"), 933 exp.to_identifier("key"), 934 exp.to_identifier("path"), 935 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 936 seq_get(unnest_alias.columns if unnest_alias else [], 0) 937 or exp.to_identifier("value"), 938 exp.to_identifier("this"), 939 ] 940 941 if unnest_alias: 942 unnest_alias.set("columns", columns) 943 else: 944 unnest_alias = exp.TableAlias(this="_u", columns=columns) 945 946 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 947 alias = self.sql(unnest_alias) 948 alias = f" AS {alias}" if alias else "" 949 return f"{explode}{alias}"
951 def show_sql(self, expression: exp.Show) -> str: 952 terse = "TERSE " if expression.args.get("terse") else "" 953 history = " HISTORY" if expression.args.get("history") else "" 954 like = self.sql(expression, "like") 955 like = f" LIKE {like}" if like else "" 956 957 scope = self.sql(expression, "scope") 958 scope = f" {scope}" if scope else "" 959 960 scope_kind = self.sql(expression, "scope_kind") 961 if scope_kind: 962 scope_kind = f" IN {scope_kind}" 963 964 starts_with = self.sql(expression, "starts_with") 965 if starts_with: 966 starts_with = f" STARTS WITH {starts_with}" 967 968 limit = self.sql(expression, "limit") 969 970 from_ = self.sql(expression, "from") 971 if from_: 972 from_ = f" FROM {from_}" 973 974 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
976 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 977 # Other dialects don't support all of the following parameters, so we need to 978 # generate default values as necessary to ensure the transpilation is correct 979 group = expression.args.get("group") 980 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 981 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 982 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 983 984 return self.func( 985 "REGEXP_SUBSTR", 986 expression.this, 987 expression.expression, 988 position, 989 occurrence, 990 parameters, 991 group, 992 )
1004 def describe_sql(self, expression: exp.Describe) -> str: 1005 # Default to table if kind is unknown 1006 kind_value = expression.args.get("kind") or "TABLE" 1007 kind = f" {kind_value}" if kind_value else "" 1008 this = f" {self.sql(expression, 'this')}" 1009 expressions = self.expressions(expression, flat=True) 1010 expressions = f" {expressions}" if expressions else "" 1011 return f"DESCRIBE{kind}{this}{expressions}"
1013 def generatedasidentitycolumnconstraint_sql( 1014 self, expression: exp.GeneratedAsIdentityColumnConstraint 1015 ) -> str: 1016 start = expression.args.get("start") 1017 start = f" START {start}" if start else "" 1018 increment = expression.args.get("increment") 1019 increment = f" INCREMENT {increment}" if increment else "" 1020 return f"AUTOINCREMENT{start}{increment}"
1029 def struct_sql(self, expression: exp.Struct) -> str: 1030 keys = [] 1031 values = [] 1032 1033 for i, e in enumerate(expression.expressions): 1034 if isinstance(e, exp.PropertyEQ): 1035 keys.append( 1036 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1037 ) 1038 values.append(e.expression) 1039 else: 1040 keys.append(exp.Literal.string(f"_{i}")) 1041 values.append(e) 1042 1043 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1045 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1046 if expression.args.get("weight") or expression.args.get("accuracy"): 1047 self.unsupported( 1048 "APPROX_PERCENTILE with weight and/or accuracy arguments are not supported in Snowflake" 1049 ) 1050 1051 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile"))
1053 def alterset_sql(self, expression: exp.AlterSet) -> str: 1054 exprs = self.expressions(expression, flat=True) 1055 exprs = f" {exprs}" if exprs else "" 1056 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1057 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1058 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1059 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1060 tag = self.expressions(expression, key="tag", flat=True) 1061 tag = f" TAG {tag}" if tag else "" 1062 1063 return f"SET{exprs}{file_format}{copy_options}{tag}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_SET_OP
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql