sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 date_delta_sql, 11 date_trunc_to_time, 12 datestrtodate_sql, 13 build_formatted_time, 14 if_sql, 15 inline_array_sql, 16 max_or_greatest, 17 min_or_least, 18 rename_func, 19 timestamptrunc_sql, 20 timestrtotime_sql, 21 var_map_sql, 22) 23from sqlglot.expressions import Literal 24from sqlglot.helper import is_int, seq_get 25from sqlglot.tokens import TokenType 26 27if t.TYPE_CHECKING: 28 from sqlglot._typing import E 29 30 31# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 32def _build_to_timestamp(args: t.List) -> t.Union[exp.StrToTime, exp.UnixToTime, exp.TimeStrToTime]: 33 if len(args) == 2: 34 first_arg, second_arg = args 35 if second_arg.is_string: 36 # case: <string_expr> [ , <format> ] 37 return build_formatted_time(exp.StrToTime, "snowflake")(args) 38 return exp.UnixToTime(this=first_arg, scale=second_arg) 39 40 from sqlglot.optimizer.simplify import simplify_literals 41 42 # The first argument might be an expression like 40 * 365 * 86400, so we try to 43 # reduce it using `simplify_literals` first and then check if it's a Literal. 44 first_arg = seq_get(args, 0) 45 if not isinstance(simplify_literals(first_arg, root=True), Literal): 46 # case: <variant_expr> or other expressions such as columns 47 return exp.TimeStrToTime.from_arg_list(args) 48 49 if first_arg.is_string: 50 if is_int(first_arg.this): 51 # case: <integer> 52 return exp.UnixToTime.from_arg_list(args) 53 54 # case: <date_expr> 55 return build_formatted_time(exp.StrToTime, "snowflake", default=True)(args) 56 57 # case: <numeric_expr> 58 return exp.UnixToTime.from_arg_list(args) 59 60 61def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 62 expression = parser.build_var_map(args) 63 64 if isinstance(expression, exp.StarMap): 65 return expression 66 67 return exp.Struct( 68 expressions=[ 69 t.cast(exp.Condition, k).eq(v) for k, v in zip(expression.keys, expression.values) 70 ] 71 ) 72 73 74def _build_datediff(args: t.List) -> exp.DateDiff: 75 return exp.DateDiff( 76 this=seq_get(args, 2), expression=seq_get(args, 1), unit=_map_date_part(seq_get(args, 0)) 77 ) 78 79 80# https://docs.snowflake.com/en/sql-reference/functions/div0 81def _build_if_from_div0(args: t.List) -> exp.If: 82 cond = exp.EQ(this=seq_get(args, 1), expression=exp.Literal.number(0)) 83 true = exp.Literal.number(0) 84 false = exp.Div(this=seq_get(args, 0), expression=seq_get(args, 1)) 85 return exp.If(this=cond, true=true, false=false) 86 87 88# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 89def _build_if_from_zeroifnull(args: t.List) -> exp.If: 90 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 91 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 92 93 94# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 95def _build_if_from_nullifzero(args: t.List) -> exp.If: 96 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 97 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 98 99 100def _datatype_sql(self: Snowflake.Generator, expression: exp.DataType) -> str: 101 if expression.is_type("array"): 102 return "ARRAY" 103 elif expression.is_type("map"): 104 return "OBJECT" 105 return self.datatype_sql(expression) 106 107 108def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 109 flag = expression.text("flag") 110 111 if "i" not in flag: 112 flag += "i" 113 114 return self.func( 115 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 116 ) 117 118 119def _build_convert_timezone(args: t.List) -> t.Union[exp.Anonymous, exp.AtTimeZone]: 120 if len(args) == 3: 121 return exp.Anonymous(this="CONVERT_TIMEZONE", expressions=args) 122 return exp.AtTimeZone(this=seq_get(args, 1), zone=seq_get(args, 0)) 123 124 125def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 126 regexp_replace = exp.RegexpReplace.from_arg_list(args) 127 128 if not regexp_replace.args.get("replacement"): 129 regexp_replace.set("replacement", exp.Literal.string("")) 130 131 return regexp_replace 132 133 134def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 135 def _parse(self: Snowflake.Parser) -> exp.Show: 136 return self._parse_show_snowflake(*args, **kwargs) 137 138 return _parse 139 140 141DATE_PART_MAPPING = { 142 "Y": "YEAR", 143 "YY": "YEAR", 144 "YYY": "YEAR", 145 "YYYY": "YEAR", 146 "YR": "YEAR", 147 "YEARS": "YEAR", 148 "YRS": "YEAR", 149 "MM": "MONTH", 150 "MON": "MONTH", 151 "MONS": "MONTH", 152 "MONTHS": "MONTH", 153 "D": "DAY", 154 "DD": "DAY", 155 "DAYS": "DAY", 156 "DAYOFMONTH": "DAY", 157 "WEEKDAY": "DAYOFWEEK", 158 "DOW": "DAYOFWEEK", 159 "DW": "DAYOFWEEK", 160 "WEEKDAY_ISO": "DAYOFWEEKISO", 161 "DOW_ISO": "DAYOFWEEKISO", 162 "DW_ISO": "DAYOFWEEKISO", 163 "YEARDAY": "DAYOFYEAR", 164 "DOY": "DAYOFYEAR", 165 "DY": "DAYOFYEAR", 166 "W": "WEEK", 167 "WK": "WEEK", 168 "WEEKOFYEAR": "WEEK", 169 "WOY": "WEEK", 170 "WY": "WEEK", 171 "WEEK_ISO": "WEEKISO", 172 "WEEKOFYEARISO": "WEEKISO", 173 "WEEKOFYEAR_ISO": "WEEKISO", 174 "Q": "QUARTER", 175 "QTR": "QUARTER", 176 "QTRS": "QUARTER", 177 "QUARTERS": "QUARTER", 178 "H": "HOUR", 179 "HH": "HOUR", 180 "HR": "HOUR", 181 "HOURS": "HOUR", 182 "HRS": "HOUR", 183 "M": "MINUTE", 184 "MI": "MINUTE", 185 "MIN": "MINUTE", 186 "MINUTES": "MINUTE", 187 "MINS": "MINUTE", 188 "S": "SECOND", 189 "SEC": "SECOND", 190 "SECONDS": "SECOND", 191 "SECS": "SECOND", 192 "MS": "MILLISECOND", 193 "MSEC": "MILLISECOND", 194 "MILLISECONDS": "MILLISECOND", 195 "US": "MICROSECOND", 196 "USEC": "MICROSECOND", 197 "MICROSECONDS": "MICROSECOND", 198 "NS": "NANOSECOND", 199 "NSEC": "NANOSECOND", 200 "NANOSEC": "NANOSECOND", 201 "NSECOND": "NANOSECOND", 202 "NSECONDS": "NANOSECOND", 203 "NANOSECS": "NANOSECOND", 204 "EPOCH": "EPOCH_SECOND", 205 "EPOCH_SECONDS": "EPOCH_SECOND", 206 "EPOCH_MILLISECONDS": "EPOCH_MILLISECOND", 207 "EPOCH_MICROSECONDS": "EPOCH_MICROSECOND", 208 "EPOCH_NANOSECONDS": "EPOCH_NANOSECOND", 209 "TZH": "TIMEZONE_HOUR", 210 "TZM": "TIMEZONE_MINUTE", 211} 212 213 214@t.overload 215def _map_date_part(part: exp.Expression) -> exp.Var: 216 pass 217 218 219@t.overload 220def _map_date_part(part: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 221 pass 222 223 224def _map_date_part(part): 225 mapped = DATE_PART_MAPPING.get(part.name.upper()) if part else None 226 return exp.var(mapped) if mapped else part 227 228 229def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 230 trunc = date_trunc_to_time(args) 231 trunc.set("unit", _map_date_part(trunc.args["unit"])) 232 return trunc 233 234 235def _build_timestamp_from_parts(args: t.List) -> exp.Func: 236 if len(args) == 2: 237 # Other dialects don't have the TIMESTAMP_FROM_PARTS(date, time) concept, 238 # so we parse this into Anonymous for now instead of introducing complexity 239 return exp.Anonymous(this="TIMESTAMP_FROM_PARTS", expressions=args) 240 241 return exp.TimestampFromParts.from_arg_list(args) 242 243 244def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 245 """ 246 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 247 so we need to unqualify them. 248 249 Example: 250 >>> from sqlglot import parse_one 251 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 252 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 253 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 254 """ 255 if isinstance(expression, exp.Pivot) and expression.unpivot: 256 expression = transforms.unqualify_columns(expression) 257 258 return expression 259 260 261class Snowflake(Dialect): 262 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 263 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 264 NULL_ORDERING = "nulls_are_large" 265 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 266 SUPPORTS_USER_DEFINED_TYPES = False 267 SUPPORTS_SEMI_ANTI_JOIN = False 268 PREFER_CTE_ALIAS_COLUMN = True 269 TABLESAMPLE_SIZE_IS_PERCENT = True 270 271 TIME_MAPPING = { 272 "YYYY": "%Y", 273 "yyyy": "%Y", 274 "YY": "%y", 275 "yy": "%y", 276 "MMMM": "%B", 277 "mmmm": "%B", 278 "MON": "%b", 279 "mon": "%b", 280 "MM": "%m", 281 "mm": "%m", 282 "DD": "%d", 283 "dd": "%-d", 284 "DY": "%a", 285 "dy": "%w", 286 "HH24": "%H", 287 "hh24": "%H", 288 "HH12": "%I", 289 "hh12": "%I", 290 "MI": "%M", 291 "mi": "%M", 292 "SS": "%S", 293 "ss": "%S", 294 "FF": "%f", 295 "ff": "%f", 296 "FF6": "%f", 297 "ff6": "%f", 298 } 299 300 def quote_identifier(self, expression: E, identify: bool = True) -> E: 301 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 302 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 303 if ( 304 isinstance(expression, exp.Identifier) 305 and isinstance(expression.parent, exp.Table) 306 and expression.name.lower() == "dual" 307 ): 308 return expression # type: ignore 309 310 return super().quote_identifier(expression, identify=identify) 311 312 class Parser(parser.Parser): 313 IDENTIFY_PIVOT_STRINGS = True 314 315 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 316 317 FUNCTIONS = { 318 **parser.Parser.FUNCTIONS, 319 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 320 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 321 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 322 this=seq_get(args, 1), expression=seq_get(args, 0) 323 ), 324 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 325 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 326 start=seq_get(args, 0), 327 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 328 step=seq_get(args, 2), 329 ), 330 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 331 "BITXOR": binary_from_function(exp.BitwiseXor), 332 "BIT_XOR": binary_from_function(exp.BitwiseXor), 333 "BOOLXOR": binary_from_function(exp.Xor), 334 "CONVERT_TIMEZONE": _build_convert_timezone, 335 "DATE_TRUNC": _date_trunc_to_time, 336 "DATEADD": lambda args: exp.DateAdd( 337 this=seq_get(args, 2), 338 expression=seq_get(args, 1), 339 unit=_map_date_part(seq_get(args, 0)), 340 ), 341 "DATEDIFF": _build_datediff, 342 "DIV0": _build_if_from_div0, 343 "FLATTEN": exp.Explode.from_arg_list, 344 "GET_PATH": lambda args, dialect: exp.JSONExtract( 345 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 346 ), 347 "IFF": exp.If.from_arg_list, 348 "LAST_DAY": lambda args: exp.LastDay( 349 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 350 ), 351 "LISTAGG": exp.GroupConcat.from_arg_list, 352 "NULLIFZERO": _build_if_from_nullifzero, 353 "OBJECT_CONSTRUCT": _build_object_construct, 354 "REGEXP_REPLACE": _build_regexp_replace, 355 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 356 "RLIKE": exp.RegexpLike.from_arg_list, 357 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 358 "TIMEDIFF": _build_datediff, 359 "TIMESTAMPDIFF": _build_datediff, 360 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 361 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 362 "TO_TIMESTAMP": _build_to_timestamp, 363 "TO_VARCHAR": exp.ToChar.from_arg_list, 364 "ZEROIFNULL": _build_if_from_zeroifnull, 365 } 366 367 FUNCTION_PARSERS = { 368 **parser.Parser.FUNCTION_PARSERS, 369 "DATE_PART": lambda self: self._parse_date_part(), 370 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 371 } 372 FUNCTION_PARSERS.pop("TRIM") 373 374 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 375 376 RANGE_PARSERS = { 377 **parser.Parser.RANGE_PARSERS, 378 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 379 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 380 TokenType.COLON: lambda self, this: self._parse_colon_get_path(this), 381 } 382 383 ALTER_PARSERS = { 384 **parser.Parser.ALTER_PARSERS, 385 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 386 "UNSET": lambda self: self.expression( 387 exp.Set, 388 tag=self._match_text_seq("TAG"), 389 expressions=self._parse_csv(self._parse_id_var), 390 unset=True, 391 ), 392 "SWAP": lambda self: self._parse_alter_table_swap(), 393 } 394 395 STATEMENT_PARSERS = { 396 **parser.Parser.STATEMENT_PARSERS, 397 TokenType.SHOW: lambda self: self._parse_show(), 398 } 399 400 PROPERTY_PARSERS = { 401 **parser.Parser.PROPERTY_PARSERS, 402 "LOCATION": lambda self: self._parse_location(), 403 } 404 405 SHOW_PARSERS = { 406 "SCHEMAS": _show_parser("SCHEMAS"), 407 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 408 "OBJECTS": _show_parser("OBJECTS"), 409 "TERSE OBJECTS": _show_parser("OBJECTS"), 410 "TABLES": _show_parser("TABLES"), 411 "TERSE TABLES": _show_parser("TABLES"), 412 "VIEWS": _show_parser("VIEWS"), 413 "TERSE VIEWS": _show_parser("VIEWS"), 414 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 415 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 416 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 417 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 418 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 419 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 420 "SEQUENCES": _show_parser("SEQUENCES"), 421 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 422 "COLUMNS": _show_parser("COLUMNS"), 423 "USERS": _show_parser("USERS"), 424 "TERSE USERS": _show_parser("USERS"), 425 } 426 427 STAGED_FILE_SINGLE_TOKENS = { 428 TokenType.DOT, 429 TokenType.MOD, 430 TokenType.SLASH, 431 } 432 433 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 434 435 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 436 437 def _parse_colon_get_path( 438 self: parser.Parser, this: t.Optional[exp.Expression] 439 ) -> t.Optional[exp.Expression]: 440 while True: 441 path = self._parse_bitwise() 442 443 # The cast :: operator has a lower precedence than the extraction operator :, so 444 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 445 if isinstance(path, exp.Cast): 446 target_type = path.to 447 path = path.this 448 else: 449 target_type = None 450 451 if isinstance(path, exp.Expression): 452 path = exp.Literal.string(path.sql(dialect="snowflake")) 453 454 # The extraction operator : is left-associative 455 this = self.expression( 456 exp.JSONExtract, this=this, expression=self.dialect.to_json_path(path) 457 ) 458 459 if target_type: 460 this = exp.cast(this, target_type) 461 462 if not self._match(TokenType.COLON): 463 break 464 465 return self._parse_range(this) 466 467 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 468 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 469 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 470 this = self._parse_var() or self._parse_type() 471 472 if not this: 473 return None 474 475 self._match(TokenType.COMMA) 476 expression = self._parse_bitwise() 477 this = _map_date_part(this) 478 name = this.name.upper() 479 480 if name.startswith("EPOCH"): 481 if name == "EPOCH_MILLISECOND": 482 scale = 10**3 483 elif name == "EPOCH_MICROSECOND": 484 scale = 10**6 485 elif name == "EPOCH_NANOSECOND": 486 scale = 10**9 487 else: 488 scale = None 489 490 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 491 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 492 493 if scale: 494 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 495 496 return to_unix 497 498 return self.expression(exp.Extract, this=this, expression=expression) 499 500 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 501 if is_map: 502 # Keys are strings in Snowflake's objects, see also: 503 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 504 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 505 return self._parse_slice(self._parse_string()) 506 507 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 508 509 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 510 lateral = super()._parse_lateral() 511 if not lateral: 512 return lateral 513 514 if isinstance(lateral.this, exp.Explode): 515 table_alias = lateral.args.get("alias") 516 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 517 if table_alias and not table_alias.args.get("columns"): 518 table_alias.set("columns", columns) 519 elif not table_alias: 520 exp.alias_(lateral, "_flattened", table=columns, copy=False) 521 522 return lateral 523 524 def _parse_at_before(self, table: exp.Table) -> exp.Table: 525 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 526 index = self._index 527 if self._match_texts(("AT", "BEFORE")): 528 this = self._prev.text.upper() 529 kind = ( 530 self._match(TokenType.L_PAREN) 531 and self._match_texts(self.HISTORICAL_DATA_KIND) 532 and self._prev.text.upper() 533 ) 534 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 535 536 if expression: 537 self._match_r_paren() 538 when = self.expression( 539 exp.HistoricalData, this=this, kind=kind, expression=expression 540 ) 541 table.set("when", when) 542 else: 543 self._retreat(index) 544 545 return table 546 547 def _parse_table_parts( 548 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 549 ) -> exp.Table: 550 # https://docs.snowflake.com/en/user-guide/querying-stage 551 if self._match(TokenType.STRING, advance=False): 552 table = self._parse_string() 553 elif self._match_text_seq("@", advance=False): 554 table = self._parse_location_path() 555 else: 556 table = None 557 558 if table: 559 file_format = None 560 pattern = None 561 562 self._match(TokenType.L_PAREN) 563 while self._curr and not self._match(TokenType.R_PAREN): 564 if self._match_text_seq("FILE_FORMAT", "=>"): 565 file_format = self._parse_string() or super()._parse_table_parts( 566 is_db_reference=is_db_reference 567 ) 568 elif self._match_text_seq("PATTERN", "=>"): 569 pattern = self._parse_string() 570 else: 571 break 572 573 self._match(TokenType.COMMA) 574 575 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 576 else: 577 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 578 579 return self._parse_at_before(table) 580 581 def _parse_id_var( 582 self, 583 any_token: bool = True, 584 tokens: t.Optional[t.Collection[TokenType]] = None, 585 ) -> t.Optional[exp.Expression]: 586 if self._match_text_seq("IDENTIFIER", "("): 587 identifier = ( 588 super()._parse_id_var(any_token=any_token, tokens=tokens) 589 or self._parse_string() 590 ) 591 self._match_r_paren() 592 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 593 594 return super()._parse_id_var(any_token=any_token, tokens=tokens) 595 596 def _parse_show_snowflake(self, this: str) -> exp.Show: 597 scope = None 598 scope_kind = None 599 600 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 601 # which is syntactically valid but has no effect on the output 602 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 603 604 history = self._match_text_seq("HISTORY") 605 606 like = self._parse_string() if self._match(TokenType.LIKE) else None 607 608 if self._match(TokenType.IN): 609 if self._match_text_seq("ACCOUNT"): 610 scope_kind = "ACCOUNT" 611 elif self._match_set(self.DB_CREATABLES): 612 scope_kind = self._prev.text.upper() 613 if self._curr: 614 scope = self._parse_table_parts() 615 elif self._curr: 616 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 617 scope = self._parse_table_parts() 618 619 return self.expression( 620 exp.Show, 621 **{ 622 "terse": terse, 623 "this": this, 624 "history": history, 625 "like": like, 626 "scope": scope, 627 "scope_kind": scope_kind, 628 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 629 "limit": self._parse_limit(), 630 "from": self._parse_string() if self._match(TokenType.FROM) else None, 631 }, 632 ) 633 634 def _parse_alter_table_swap(self) -> exp.SwapTable: 635 self._match_text_seq("WITH") 636 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 637 638 def _parse_location(self) -> exp.LocationProperty: 639 self._match(TokenType.EQ) 640 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 641 642 def _parse_location_path(self) -> exp.Var: 643 parts = [self._advance_any(ignore_reserved=True)] 644 645 # We avoid consuming a comma token because external tables like @foo and @bar 646 # can be joined in a query with a comma separator. 647 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 648 parts.append(self._advance_any(ignore_reserved=True)) 649 650 return exp.var("".join(part.text for part in parts if part)) 651 652 class Tokenizer(tokens.Tokenizer): 653 STRING_ESCAPES = ["\\", "'"] 654 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 655 RAW_STRINGS = ["$$"] 656 COMMENTS = ["--", "//", ("/*", "*/")] 657 658 KEYWORDS = { 659 **tokens.Tokenizer.KEYWORDS, 660 "BYTEINT": TokenType.INT, 661 "CHAR VARYING": TokenType.VARCHAR, 662 "CHARACTER VARYING": TokenType.VARCHAR, 663 "EXCLUDE": TokenType.EXCEPT, 664 "ILIKE ANY": TokenType.ILIKE_ANY, 665 "LIKE ANY": TokenType.LIKE_ANY, 666 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 667 "MINUS": TokenType.EXCEPT, 668 "NCHAR VARYING": TokenType.VARCHAR, 669 "PUT": TokenType.COMMAND, 670 "REMOVE": TokenType.COMMAND, 671 "RENAME": TokenType.REPLACE, 672 "RM": TokenType.COMMAND, 673 "SAMPLE": TokenType.TABLE_SAMPLE, 674 "SQL_DOUBLE": TokenType.DOUBLE, 675 "SQL_VARCHAR": TokenType.VARCHAR, 676 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 677 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 678 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 679 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 680 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 681 "TOP": TokenType.TOP, 682 } 683 684 SINGLE_TOKENS = { 685 **tokens.Tokenizer.SINGLE_TOKENS, 686 "$": TokenType.PARAMETER, 687 } 688 689 VAR_SINGLE_TOKENS = {"$"} 690 691 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 692 693 class Generator(generator.Generator): 694 PARAMETER_TOKEN = "$" 695 MATCHED_BY_SOURCE = False 696 SINGLE_STRING_INTERVAL = True 697 JOIN_HINTS = False 698 TABLE_HINTS = False 699 QUERY_HINTS = False 700 AGGREGATE_FILTER_SUPPORTED = False 701 SUPPORTS_TABLE_COPY = False 702 COLLATE_IS_FUNC = True 703 LIMIT_ONLY_LITERALS = True 704 JSON_KEY_VALUE_PAIR_SEP = "," 705 INSERT_OVERWRITE = " OVERWRITE INTO" 706 707 TRANSFORMS = { 708 **generator.Generator.TRANSFORMS, 709 exp.ArgMax: rename_func("MAX_BY"), 710 exp.ArgMin: rename_func("MIN_BY"), 711 exp.Array: inline_array_sql, 712 exp.ArrayConcat: rename_func("ARRAY_CAT"), 713 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 714 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 715 exp.AtTimeZone: lambda self, e: self.func( 716 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 717 ), 718 exp.BitwiseXor: rename_func("BITXOR"), 719 exp.DateAdd: date_delta_sql("DATEADD"), 720 exp.DateDiff: date_delta_sql("DATEDIFF"), 721 exp.DateStrToDate: datestrtodate_sql, 722 exp.DataType: _datatype_sql, 723 exp.DayOfMonth: rename_func("DAYOFMONTH"), 724 exp.DayOfWeek: rename_func("DAYOFWEEK"), 725 exp.DayOfYear: rename_func("DAYOFYEAR"), 726 exp.Explode: rename_func("FLATTEN"), 727 exp.Extract: rename_func("DATE_PART"), 728 exp.FromTimeZone: lambda self, e: self.func( 729 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 730 ), 731 exp.GenerateSeries: lambda self, e: self.func( 732 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 733 ), 734 exp.GroupConcat: rename_func("LISTAGG"), 735 exp.If: if_sql(name="IFF", false_value="NULL"), 736 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 737 exp.JSONExtractScalar: lambda self, e: self.func( 738 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 739 ), 740 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 741 exp.JSONPathRoot: lambda *_: "", 742 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 743 exp.LogicalOr: rename_func("BOOLOR_AGG"), 744 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 745 exp.Max: max_or_greatest, 746 exp.Min: min_or_least, 747 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 748 exp.PercentileCont: transforms.preprocess( 749 [transforms.add_within_group_for_percentiles] 750 ), 751 exp.PercentileDisc: transforms.preprocess( 752 [transforms.add_within_group_for_percentiles] 753 ), 754 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 755 exp.RegexpILike: _regexpilike_sql, 756 exp.Rand: rename_func("RANDOM"), 757 exp.Select: transforms.preprocess( 758 [ 759 transforms.eliminate_distinct_on, 760 transforms.explode_to_unnest(), 761 transforms.eliminate_semi_and_anti_joins, 762 ] 763 ), 764 exp.SHA: rename_func("SHA1"), 765 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 766 exp.StartsWith: rename_func("STARTSWITH"), 767 exp.StrPosition: lambda self, e: self.func( 768 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 769 ), 770 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 771 exp.Struct: lambda self, e: self.func( 772 "OBJECT_CONSTRUCT", 773 *(arg for expression in e.expressions for arg in expression.flatten()), 774 ), 775 exp.Stuff: rename_func("INSERT"), 776 exp.TimestampDiff: lambda self, e: self.func( 777 "TIMESTAMPDIFF", e.unit, e.expression, e.this 778 ), 779 exp.TimestampTrunc: timestamptrunc_sql, 780 exp.TimeStrToTime: timestrtotime_sql, 781 exp.TimeToStr: lambda self, e: self.func( 782 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 783 ), 784 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 785 exp.ToArray: rename_func("TO_ARRAY"), 786 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 787 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 788 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 789 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 790 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 791 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 792 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 793 exp.Xor: rename_func("BOOLXOR"), 794 } 795 796 SUPPORTED_JSON_PATH_PARTS = { 797 exp.JSONPathKey, 798 exp.JSONPathRoot, 799 exp.JSONPathSubscript, 800 } 801 802 TYPE_MAPPING = { 803 **generator.Generator.TYPE_MAPPING, 804 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 805 } 806 807 STAR_MAPPING = { 808 "except": "EXCLUDE", 809 "replace": "RENAME", 810 } 811 812 PROPERTIES_LOCATION = { 813 **generator.Generator.PROPERTIES_LOCATION, 814 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 815 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 816 } 817 818 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 819 milli = expression.args.get("milli") 820 if milli is not None: 821 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 822 expression.set("nano", milli_to_nano) 823 824 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 825 826 def trycast_sql(self, expression: exp.TryCast) -> str: 827 value = expression.this 828 829 if value.type is None: 830 from sqlglot.optimizer.annotate_types import annotate_types 831 832 value = annotate_types(value) 833 834 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 835 return super().trycast_sql(expression) 836 837 # TRY_CAST only works for string values in Snowflake 838 return self.cast_sql(expression) 839 840 def log_sql(self, expression: exp.Log) -> str: 841 if not expression.expression: 842 return self.func("LN", expression.this) 843 844 return super().log_sql(expression) 845 846 def unnest_sql(self, expression: exp.Unnest) -> str: 847 unnest_alias = expression.args.get("alias") 848 offset = expression.args.get("offset") 849 850 columns = [ 851 exp.to_identifier("seq"), 852 exp.to_identifier("key"), 853 exp.to_identifier("path"), 854 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 855 seq_get(unnest_alias.columns if unnest_alias else [], 0) 856 or exp.to_identifier("value"), 857 exp.to_identifier("this"), 858 ] 859 860 if unnest_alias: 861 unnest_alias.set("columns", columns) 862 else: 863 unnest_alias = exp.TableAlias(this="_u", columns=columns) 864 865 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 866 alias = self.sql(unnest_alias) 867 alias = f" AS {alias}" if alias else "" 868 return f"{explode}{alias}" 869 870 def show_sql(self, expression: exp.Show) -> str: 871 terse = "TERSE " if expression.args.get("terse") else "" 872 history = " HISTORY" if expression.args.get("history") else "" 873 like = self.sql(expression, "like") 874 like = f" LIKE {like}" if like else "" 875 876 scope = self.sql(expression, "scope") 877 scope = f" {scope}" if scope else "" 878 879 scope_kind = self.sql(expression, "scope_kind") 880 if scope_kind: 881 scope_kind = f" IN {scope_kind}" 882 883 starts_with = self.sql(expression, "starts_with") 884 if starts_with: 885 starts_with = f" STARTS WITH {starts_with}" 886 887 limit = self.sql(expression, "limit") 888 889 from_ = self.sql(expression, "from") 890 if from_: 891 from_ = f" FROM {from_}" 892 893 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 894 895 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 896 # Other dialects don't support all of the following parameters, so we need to 897 # generate default values as necessary to ensure the transpilation is correct 898 group = expression.args.get("group") 899 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 900 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 901 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 902 903 return self.func( 904 "REGEXP_SUBSTR", 905 expression.this, 906 expression.expression, 907 position, 908 occurrence, 909 parameters, 910 group, 911 ) 912 913 def except_op(self, expression: exp.Except) -> str: 914 if not expression.args.get("distinct"): 915 self.unsupported("EXCEPT with All is not supported in Snowflake") 916 return super().except_op(expression) 917 918 def intersect_op(self, expression: exp.Intersect) -> str: 919 if not expression.args.get("distinct"): 920 self.unsupported("INTERSECT with All is not supported in Snowflake") 921 return super().intersect_op(expression) 922 923 def describe_sql(self, expression: exp.Describe) -> str: 924 # Default to table if kind is unknown 925 kind_value = expression.args.get("kind") or "TABLE" 926 kind = f" {kind_value}" if kind_value else "" 927 this = f" {self.sql(expression, 'this')}" 928 expressions = self.expressions(expression, flat=True) 929 expressions = f" {expressions}" if expressions else "" 930 return f"DESCRIBE{kind}{this}{expressions}" 931 932 def generatedasidentitycolumnconstraint_sql( 933 self, expression: exp.GeneratedAsIdentityColumnConstraint 934 ) -> str: 935 start = expression.args.get("start") 936 start = f" START {start}" if start else "" 937 increment = expression.args.get("increment") 938 increment = f" INCREMENT {increment}" if increment else "" 939 return f"AUTOINCREMENT{start}{increment}" 940 941 def swaptable_sql(self, expression: exp.SwapTable) -> str: 942 this = self.sql(expression, "this") 943 return f"SWAP WITH {this}" 944 945 def with_properties(self, properties: exp.Properties) -> str: 946 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 947 948 def cluster_sql(self, expression: exp.Cluster) -> str: 949 return f"CLUSTER BY ({self.expressions(expression, flat=True)})"
262class Snowflake(Dialect): 263 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 264 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 265 NULL_ORDERING = "nulls_are_large" 266 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 267 SUPPORTS_USER_DEFINED_TYPES = False 268 SUPPORTS_SEMI_ANTI_JOIN = False 269 PREFER_CTE_ALIAS_COLUMN = True 270 TABLESAMPLE_SIZE_IS_PERCENT = True 271 272 TIME_MAPPING = { 273 "YYYY": "%Y", 274 "yyyy": "%Y", 275 "YY": "%y", 276 "yy": "%y", 277 "MMMM": "%B", 278 "mmmm": "%B", 279 "MON": "%b", 280 "mon": "%b", 281 "MM": "%m", 282 "mm": "%m", 283 "DD": "%d", 284 "dd": "%-d", 285 "DY": "%a", 286 "dy": "%w", 287 "HH24": "%H", 288 "hh24": "%H", 289 "HH12": "%I", 290 "hh12": "%I", 291 "MI": "%M", 292 "mi": "%M", 293 "SS": "%S", 294 "ss": "%S", 295 "FF": "%f", 296 "ff": "%f", 297 "FF6": "%f", 298 "ff6": "%f", 299 } 300 301 def quote_identifier(self, expression: E, identify: bool = True) -> E: 302 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 303 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 304 if ( 305 isinstance(expression, exp.Identifier) 306 and isinstance(expression.parent, exp.Table) 307 and expression.name.lower() == "dual" 308 ): 309 return expression # type: ignore 310 311 return super().quote_identifier(expression, identify=identify) 312 313 class Parser(parser.Parser): 314 IDENTIFY_PIVOT_STRINGS = True 315 316 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 317 318 FUNCTIONS = { 319 **parser.Parser.FUNCTIONS, 320 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 321 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 322 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 323 this=seq_get(args, 1), expression=seq_get(args, 0) 324 ), 325 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 326 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 327 start=seq_get(args, 0), 328 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 329 step=seq_get(args, 2), 330 ), 331 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 332 "BITXOR": binary_from_function(exp.BitwiseXor), 333 "BIT_XOR": binary_from_function(exp.BitwiseXor), 334 "BOOLXOR": binary_from_function(exp.Xor), 335 "CONVERT_TIMEZONE": _build_convert_timezone, 336 "DATE_TRUNC": _date_trunc_to_time, 337 "DATEADD": lambda args: exp.DateAdd( 338 this=seq_get(args, 2), 339 expression=seq_get(args, 1), 340 unit=_map_date_part(seq_get(args, 0)), 341 ), 342 "DATEDIFF": _build_datediff, 343 "DIV0": _build_if_from_div0, 344 "FLATTEN": exp.Explode.from_arg_list, 345 "GET_PATH": lambda args, dialect: exp.JSONExtract( 346 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 347 ), 348 "IFF": exp.If.from_arg_list, 349 "LAST_DAY": lambda args: exp.LastDay( 350 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 351 ), 352 "LISTAGG": exp.GroupConcat.from_arg_list, 353 "NULLIFZERO": _build_if_from_nullifzero, 354 "OBJECT_CONSTRUCT": _build_object_construct, 355 "REGEXP_REPLACE": _build_regexp_replace, 356 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 357 "RLIKE": exp.RegexpLike.from_arg_list, 358 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 359 "TIMEDIFF": _build_datediff, 360 "TIMESTAMPDIFF": _build_datediff, 361 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 362 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 363 "TO_TIMESTAMP": _build_to_timestamp, 364 "TO_VARCHAR": exp.ToChar.from_arg_list, 365 "ZEROIFNULL": _build_if_from_zeroifnull, 366 } 367 368 FUNCTION_PARSERS = { 369 **parser.Parser.FUNCTION_PARSERS, 370 "DATE_PART": lambda self: self._parse_date_part(), 371 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 372 } 373 FUNCTION_PARSERS.pop("TRIM") 374 375 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 376 377 RANGE_PARSERS = { 378 **parser.Parser.RANGE_PARSERS, 379 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 380 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 381 TokenType.COLON: lambda self, this: self._parse_colon_get_path(this), 382 } 383 384 ALTER_PARSERS = { 385 **parser.Parser.ALTER_PARSERS, 386 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 387 "UNSET": lambda self: self.expression( 388 exp.Set, 389 tag=self._match_text_seq("TAG"), 390 expressions=self._parse_csv(self._parse_id_var), 391 unset=True, 392 ), 393 "SWAP": lambda self: self._parse_alter_table_swap(), 394 } 395 396 STATEMENT_PARSERS = { 397 **parser.Parser.STATEMENT_PARSERS, 398 TokenType.SHOW: lambda self: self._parse_show(), 399 } 400 401 PROPERTY_PARSERS = { 402 **parser.Parser.PROPERTY_PARSERS, 403 "LOCATION": lambda self: self._parse_location(), 404 } 405 406 SHOW_PARSERS = { 407 "SCHEMAS": _show_parser("SCHEMAS"), 408 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 409 "OBJECTS": _show_parser("OBJECTS"), 410 "TERSE OBJECTS": _show_parser("OBJECTS"), 411 "TABLES": _show_parser("TABLES"), 412 "TERSE TABLES": _show_parser("TABLES"), 413 "VIEWS": _show_parser("VIEWS"), 414 "TERSE VIEWS": _show_parser("VIEWS"), 415 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 416 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 417 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 418 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 419 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 420 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 421 "SEQUENCES": _show_parser("SEQUENCES"), 422 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 423 "COLUMNS": _show_parser("COLUMNS"), 424 "USERS": _show_parser("USERS"), 425 "TERSE USERS": _show_parser("USERS"), 426 } 427 428 STAGED_FILE_SINGLE_TOKENS = { 429 TokenType.DOT, 430 TokenType.MOD, 431 TokenType.SLASH, 432 } 433 434 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 435 436 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 437 438 def _parse_colon_get_path( 439 self: parser.Parser, this: t.Optional[exp.Expression] 440 ) -> t.Optional[exp.Expression]: 441 while True: 442 path = self._parse_bitwise() 443 444 # The cast :: operator has a lower precedence than the extraction operator :, so 445 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 446 if isinstance(path, exp.Cast): 447 target_type = path.to 448 path = path.this 449 else: 450 target_type = None 451 452 if isinstance(path, exp.Expression): 453 path = exp.Literal.string(path.sql(dialect="snowflake")) 454 455 # The extraction operator : is left-associative 456 this = self.expression( 457 exp.JSONExtract, this=this, expression=self.dialect.to_json_path(path) 458 ) 459 460 if target_type: 461 this = exp.cast(this, target_type) 462 463 if not self._match(TokenType.COLON): 464 break 465 466 return self._parse_range(this) 467 468 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 469 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 470 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 471 this = self._parse_var() or self._parse_type() 472 473 if not this: 474 return None 475 476 self._match(TokenType.COMMA) 477 expression = self._parse_bitwise() 478 this = _map_date_part(this) 479 name = this.name.upper() 480 481 if name.startswith("EPOCH"): 482 if name == "EPOCH_MILLISECOND": 483 scale = 10**3 484 elif name == "EPOCH_MICROSECOND": 485 scale = 10**6 486 elif name == "EPOCH_NANOSECOND": 487 scale = 10**9 488 else: 489 scale = None 490 491 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 492 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 493 494 if scale: 495 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 496 497 return to_unix 498 499 return self.expression(exp.Extract, this=this, expression=expression) 500 501 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 502 if is_map: 503 # Keys are strings in Snowflake's objects, see also: 504 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 505 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 506 return self._parse_slice(self._parse_string()) 507 508 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 509 510 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 511 lateral = super()._parse_lateral() 512 if not lateral: 513 return lateral 514 515 if isinstance(lateral.this, exp.Explode): 516 table_alias = lateral.args.get("alias") 517 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 518 if table_alias and not table_alias.args.get("columns"): 519 table_alias.set("columns", columns) 520 elif not table_alias: 521 exp.alias_(lateral, "_flattened", table=columns, copy=False) 522 523 return lateral 524 525 def _parse_at_before(self, table: exp.Table) -> exp.Table: 526 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 527 index = self._index 528 if self._match_texts(("AT", "BEFORE")): 529 this = self._prev.text.upper() 530 kind = ( 531 self._match(TokenType.L_PAREN) 532 and self._match_texts(self.HISTORICAL_DATA_KIND) 533 and self._prev.text.upper() 534 ) 535 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 536 537 if expression: 538 self._match_r_paren() 539 when = self.expression( 540 exp.HistoricalData, this=this, kind=kind, expression=expression 541 ) 542 table.set("when", when) 543 else: 544 self._retreat(index) 545 546 return table 547 548 def _parse_table_parts( 549 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 550 ) -> exp.Table: 551 # https://docs.snowflake.com/en/user-guide/querying-stage 552 if self._match(TokenType.STRING, advance=False): 553 table = self._parse_string() 554 elif self._match_text_seq("@", advance=False): 555 table = self._parse_location_path() 556 else: 557 table = None 558 559 if table: 560 file_format = None 561 pattern = None 562 563 self._match(TokenType.L_PAREN) 564 while self._curr and not self._match(TokenType.R_PAREN): 565 if self._match_text_seq("FILE_FORMAT", "=>"): 566 file_format = self._parse_string() or super()._parse_table_parts( 567 is_db_reference=is_db_reference 568 ) 569 elif self._match_text_seq("PATTERN", "=>"): 570 pattern = self._parse_string() 571 else: 572 break 573 574 self._match(TokenType.COMMA) 575 576 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 577 else: 578 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 579 580 return self._parse_at_before(table) 581 582 def _parse_id_var( 583 self, 584 any_token: bool = True, 585 tokens: t.Optional[t.Collection[TokenType]] = None, 586 ) -> t.Optional[exp.Expression]: 587 if self._match_text_seq("IDENTIFIER", "("): 588 identifier = ( 589 super()._parse_id_var(any_token=any_token, tokens=tokens) 590 or self._parse_string() 591 ) 592 self._match_r_paren() 593 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 594 595 return super()._parse_id_var(any_token=any_token, tokens=tokens) 596 597 def _parse_show_snowflake(self, this: str) -> exp.Show: 598 scope = None 599 scope_kind = None 600 601 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 602 # which is syntactically valid but has no effect on the output 603 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 604 605 history = self._match_text_seq("HISTORY") 606 607 like = self._parse_string() if self._match(TokenType.LIKE) else None 608 609 if self._match(TokenType.IN): 610 if self._match_text_seq("ACCOUNT"): 611 scope_kind = "ACCOUNT" 612 elif self._match_set(self.DB_CREATABLES): 613 scope_kind = self._prev.text.upper() 614 if self._curr: 615 scope = self._parse_table_parts() 616 elif self._curr: 617 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 618 scope = self._parse_table_parts() 619 620 return self.expression( 621 exp.Show, 622 **{ 623 "terse": terse, 624 "this": this, 625 "history": history, 626 "like": like, 627 "scope": scope, 628 "scope_kind": scope_kind, 629 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 630 "limit": self._parse_limit(), 631 "from": self._parse_string() if self._match(TokenType.FROM) else None, 632 }, 633 ) 634 635 def _parse_alter_table_swap(self) -> exp.SwapTable: 636 self._match_text_seq("WITH") 637 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 638 639 def _parse_location(self) -> exp.LocationProperty: 640 self._match(TokenType.EQ) 641 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 642 643 def _parse_location_path(self) -> exp.Var: 644 parts = [self._advance_any(ignore_reserved=True)] 645 646 # We avoid consuming a comma token because external tables like @foo and @bar 647 # can be joined in a query with a comma separator. 648 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 649 parts.append(self._advance_any(ignore_reserved=True)) 650 651 return exp.var("".join(part.text for part in parts if part)) 652 653 class Tokenizer(tokens.Tokenizer): 654 STRING_ESCAPES = ["\\", "'"] 655 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 656 RAW_STRINGS = ["$$"] 657 COMMENTS = ["--", "//", ("/*", "*/")] 658 659 KEYWORDS = { 660 **tokens.Tokenizer.KEYWORDS, 661 "BYTEINT": TokenType.INT, 662 "CHAR VARYING": TokenType.VARCHAR, 663 "CHARACTER VARYING": TokenType.VARCHAR, 664 "EXCLUDE": TokenType.EXCEPT, 665 "ILIKE ANY": TokenType.ILIKE_ANY, 666 "LIKE ANY": TokenType.LIKE_ANY, 667 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 668 "MINUS": TokenType.EXCEPT, 669 "NCHAR VARYING": TokenType.VARCHAR, 670 "PUT": TokenType.COMMAND, 671 "REMOVE": TokenType.COMMAND, 672 "RENAME": TokenType.REPLACE, 673 "RM": TokenType.COMMAND, 674 "SAMPLE": TokenType.TABLE_SAMPLE, 675 "SQL_DOUBLE": TokenType.DOUBLE, 676 "SQL_VARCHAR": TokenType.VARCHAR, 677 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 678 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 679 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 680 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 681 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 682 "TOP": TokenType.TOP, 683 } 684 685 SINGLE_TOKENS = { 686 **tokens.Tokenizer.SINGLE_TOKENS, 687 "$": TokenType.PARAMETER, 688 } 689 690 VAR_SINGLE_TOKENS = {"$"} 691 692 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 693 694 class Generator(generator.Generator): 695 PARAMETER_TOKEN = "$" 696 MATCHED_BY_SOURCE = False 697 SINGLE_STRING_INTERVAL = True 698 JOIN_HINTS = False 699 TABLE_HINTS = False 700 QUERY_HINTS = False 701 AGGREGATE_FILTER_SUPPORTED = False 702 SUPPORTS_TABLE_COPY = False 703 COLLATE_IS_FUNC = True 704 LIMIT_ONLY_LITERALS = True 705 JSON_KEY_VALUE_PAIR_SEP = "," 706 INSERT_OVERWRITE = " OVERWRITE INTO" 707 708 TRANSFORMS = { 709 **generator.Generator.TRANSFORMS, 710 exp.ArgMax: rename_func("MAX_BY"), 711 exp.ArgMin: rename_func("MIN_BY"), 712 exp.Array: inline_array_sql, 713 exp.ArrayConcat: rename_func("ARRAY_CAT"), 714 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 715 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 716 exp.AtTimeZone: lambda self, e: self.func( 717 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 718 ), 719 exp.BitwiseXor: rename_func("BITXOR"), 720 exp.DateAdd: date_delta_sql("DATEADD"), 721 exp.DateDiff: date_delta_sql("DATEDIFF"), 722 exp.DateStrToDate: datestrtodate_sql, 723 exp.DataType: _datatype_sql, 724 exp.DayOfMonth: rename_func("DAYOFMONTH"), 725 exp.DayOfWeek: rename_func("DAYOFWEEK"), 726 exp.DayOfYear: rename_func("DAYOFYEAR"), 727 exp.Explode: rename_func("FLATTEN"), 728 exp.Extract: rename_func("DATE_PART"), 729 exp.FromTimeZone: lambda self, e: self.func( 730 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 731 ), 732 exp.GenerateSeries: lambda self, e: self.func( 733 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 734 ), 735 exp.GroupConcat: rename_func("LISTAGG"), 736 exp.If: if_sql(name="IFF", false_value="NULL"), 737 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 738 exp.JSONExtractScalar: lambda self, e: self.func( 739 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 740 ), 741 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 742 exp.JSONPathRoot: lambda *_: "", 743 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 744 exp.LogicalOr: rename_func("BOOLOR_AGG"), 745 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 746 exp.Max: max_or_greatest, 747 exp.Min: min_or_least, 748 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 749 exp.PercentileCont: transforms.preprocess( 750 [transforms.add_within_group_for_percentiles] 751 ), 752 exp.PercentileDisc: transforms.preprocess( 753 [transforms.add_within_group_for_percentiles] 754 ), 755 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 756 exp.RegexpILike: _regexpilike_sql, 757 exp.Rand: rename_func("RANDOM"), 758 exp.Select: transforms.preprocess( 759 [ 760 transforms.eliminate_distinct_on, 761 transforms.explode_to_unnest(), 762 transforms.eliminate_semi_and_anti_joins, 763 ] 764 ), 765 exp.SHA: rename_func("SHA1"), 766 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 767 exp.StartsWith: rename_func("STARTSWITH"), 768 exp.StrPosition: lambda self, e: self.func( 769 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 770 ), 771 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 772 exp.Struct: lambda self, e: self.func( 773 "OBJECT_CONSTRUCT", 774 *(arg for expression in e.expressions for arg in expression.flatten()), 775 ), 776 exp.Stuff: rename_func("INSERT"), 777 exp.TimestampDiff: lambda self, e: self.func( 778 "TIMESTAMPDIFF", e.unit, e.expression, e.this 779 ), 780 exp.TimestampTrunc: timestamptrunc_sql, 781 exp.TimeStrToTime: timestrtotime_sql, 782 exp.TimeToStr: lambda self, e: self.func( 783 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 784 ), 785 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 786 exp.ToArray: rename_func("TO_ARRAY"), 787 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 788 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 789 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 790 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 791 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 792 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 793 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 794 exp.Xor: rename_func("BOOLXOR"), 795 } 796 797 SUPPORTED_JSON_PATH_PARTS = { 798 exp.JSONPathKey, 799 exp.JSONPathRoot, 800 exp.JSONPathSubscript, 801 } 802 803 TYPE_MAPPING = { 804 **generator.Generator.TYPE_MAPPING, 805 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 806 } 807 808 STAR_MAPPING = { 809 "except": "EXCLUDE", 810 "replace": "RENAME", 811 } 812 813 PROPERTIES_LOCATION = { 814 **generator.Generator.PROPERTIES_LOCATION, 815 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 816 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 817 } 818 819 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 820 milli = expression.args.get("milli") 821 if milli is not None: 822 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 823 expression.set("nano", milli_to_nano) 824 825 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 826 827 def trycast_sql(self, expression: exp.TryCast) -> str: 828 value = expression.this 829 830 if value.type is None: 831 from sqlglot.optimizer.annotate_types import annotate_types 832 833 value = annotate_types(value) 834 835 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 836 return super().trycast_sql(expression) 837 838 # TRY_CAST only works for string values in Snowflake 839 return self.cast_sql(expression) 840 841 def log_sql(self, expression: exp.Log) -> str: 842 if not expression.expression: 843 return self.func("LN", expression.this) 844 845 return super().log_sql(expression) 846 847 def unnest_sql(self, expression: exp.Unnest) -> str: 848 unnest_alias = expression.args.get("alias") 849 offset = expression.args.get("offset") 850 851 columns = [ 852 exp.to_identifier("seq"), 853 exp.to_identifier("key"), 854 exp.to_identifier("path"), 855 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 856 seq_get(unnest_alias.columns if unnest_alias else [], 0) 857 or exp.to_identifier("value"), 858 exp.to_identifier("this"), 859 ] 860 861 if unnest_alias: 862 unnest_alias.set("columns", columns) 863 else: 864 unnest_alias = exp.TableAlias(this="_u", columns=columns) 865 866 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 867 alias = self.sql(unnest_alias) 868 alias = f" AS {alias}" if alias else "" 869 return f"{explode}{alias}" 870 871 def show_sql(self, expression: exp.Show) -> str: 872 terse = "TERSE " if expression.args.get("terse") else "" 873 history = " HISTORY" if expression.args.get("history") else "" 874 like = self.sql(expression, "like") 875 like = f" LIKE {like}" if like else "" 876 877 scope = self.sql(expression, "scope") 878 scope = f" {scope}" if scope else "" 879 880 scope_kind = self.sql(expression, "scope_kind") 881 if scope_kind: 882 scope_kind = f" IN {scope_kind}" 883 884 starts_with = self.sql(expression, "starts_with") 885 if starts_with: 886 starts_with = f" STARTS WITH {starts_with}" 887 888 limit = self.sql(expression, "limit") 889 890 from_ = self.sql(expression, "from") 891 if from_: 892 from_ = f" FROM {from_}" 893 894 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 895 896 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 897 # Other dialects don't support all of the following parameters, so we need to 898 # generate default values as necessary to ensure the transpilation is correct 899 group = expression.args.get("group") 900 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 901 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 902 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 903 904 return self.func( 905 "REGEXP_SUBSTR", 906 expression.this, 907 expression.expression, 908 position, 909 occurrence, 910 parameters, 911 group, 912 ) 913 914 def except_op(self, expression: exp.Except) -> str: 915 if not expression.args.get("distinct"): 916 self.unsupported("EXCEPT with All is not supported in Snowflake") 917 return super().except_op(expression) 918 919 def intersect_op(self, expression: exp.Intersect) -> str: 920 if not expression.args.get("distinct"): 921 self.unsupported("INTERSECT with All is not supported in Snowflake") 922 return super().intersect_op(expression) 923 924 def describe_sql(self, expression: exp.Describe) -> str: 925 # Default to table if kind is unknown 926 kind_value = expression.args.get("kind") or "TABLE" 927 kind = f" {kind_value}" if kind_value else "" 928 this = f" {self.sql(expression, 'this')}" 929 expressions = self.expressions(expression, flat=True) 930 expressions = f" {expressions}" if expressions else "" 931 return f"DESCRIBE{kind}{this}{expressions}" 932 933 def generatedasidentitycolumnconstraint_sql( 934 self, expression: exp.GeneratedAsIdentityColumnConstraint 935 ) -> str: 936 start = expression.args.get("start") 937 start = f" START {start}" if start else "" 938 increment = expression.args.get("increment") 939 increment = f" INCREMENT {increment}" if increment else "" 940 return f"AUTOINCREMENT{start}{increment}" 941 942 def swaptable_sql(self, expression: exp.SwapTable) -> str: 943 this = self.sql(expression, "this") 944 return f"SWAP WITH {this}" 945 946 def with_properties(self, properties: exp.Properties) -> str: 947 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 948 949 def cluster_sql(self, expression: exp.Cluster) -> str: 950 return f"CLUSTER BY ({self.expressions(expression, flat=True)})"
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
301 def quote_identifier(self, expression: E, identify: bool = True) -> E: 302 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 303 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 304 if ( 305 isinstance(expression, exp.Identifier) 306 and isinstance(expression.parent, exp.Table) 307 and expression.name.lower() == "dual" 308 ): 309 return expression # type: ignore 310 311 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
313 class Parser(parser.Parser): 314 IDENTIFY_PIVOT_STRINGS = True 315 316 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 317 318 FUNCTIONS = { 319 **parser.Parser.FUNCTIONS, 320 "ARRAYAGG": exp.ArrayAgg.from_arg_list, 321 "ARRAY_CONSTRUCT": exp.Array.from_arg_list, 322 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 323 this=seq_get(args, 1), expression=seq_get(args, 0) 324 ), 325 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 326 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 327 start=seq_get(args, 0), 328 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 329 step=seq_get(args, 2), 330 ), 331 "ARRAY_TO_STRING": exp.ArrayJoin.from_arg_list, 332 "BITXOR": binary_from_function(exp.BitwiseXor), 333 "BIT_XOR": binary_from_function(exp.BitwiseXor), 334 "BOOLXOR": binary_from_function(exp.Xor), 335 "CONVERT_TIMEZONE": _build_convert_timezone, 336 "DATE_TRUNC": _date_trunc_to_time, 337 "DATEADD": lambda args: exp.DateAdd( 338 this=seq_get(args, 2), 339 expression=seq_get(args, 1), 340 unit=_map_date_part(seq_get(args, 0)), 341 ), 342 "DATEDIFF": _build_datediff, 343 "DIV0": _build_if_from_div0, 344 "FLATTEN": exp.Explode.from_arg_list, 345 "GET_PATH": lambda args, dialect: exp.JSONExtract( 346 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 347 ), 348 "IFF": exp.If.from_arg_list, 349 "LAST_DAY": lambda args: exp.LastDay( 350 this=seq_get(args, 0), unit=_map_date_part(seq_get(args, 1)) 351 ), 352 "LISTAGG": exp.GroupConcat.from_arg_list, 353 "NULLIFZERO": _build_if_from_nullifzero, 354 "OBJECT_CONSTRUCT": _build_object_construct, 355 "REGEXP_REPLACE": _build_regexp_replace, 356 "REGEXP_SUBSTR": exp.RegexpExtract.from_arg_list, 357 "RLIKE": exp.RegexpLike.from_arg_list, 358 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 359 "TIMEDIFF": _build_datediff, 360 "TIMESTAMPDIFF": _build_datediff, 361 "TIMESTAMPFROMPARTS": _build_timestamp_from_parts, 362 "TIMESTAMP_FROM_PARTS": _build_timestamp_from_parts, 363 "TO_TIMESTAMP": _build_to_timestamp, 364 "TO_VARCHAR": exp.ToChar.from_arg_list, 365 "ZEROIFNULL": _build_if_from_zeroifnull, 366 } 367 368 FUNCTION_PARSERS = { 369 **parser.Parser.FUNCTION_PARSERS, 370 "DATE_PART": lambda self: self._parse_date_part(), 371 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 372 } 373 FUNCTION_PARSERS.pop("TRIM") 374 375 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 376 377 RANGE_PARSERS = { 378 **parser.Parser.RANGE_PARSERS, 379 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 380 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 381 TokenType.COLON: lambda self, this: self._parse_colon_get_path(this), 382 } 383 384 ALTER_PARSERS = { 385 **parser.Parser.ALTER_PARSERS, 386 "SET": lambda self: self._parse_set(tag=self._match_text_seq("TAG")), 387 "UNSET": lambda self: self.expression( 388 exp.Set, 389 tag=self._match_text_seq("TAG"), 390 expressions=self._parse_csv(self._parse_id_var), 391 unset=True, 392 ), 393 "SWAP": lambda self: self._parse_alter_table_swap(), 394 } 395 396 STATEMENT_PARSERS = { 397 **parser.Parser.STATEMENT_PARSERS, 398 TokenType.SHOW: lambda self: self._parse_show(), 399 } 400 401 PROPERTY_PARSERS = { 402 **parser.Parser.PROPERTY_PARSERS, 403 "LOCATION": lambda self: self._parse_location(), 404 } 405 406 SHOW_PARSERS = { 407 "SCHEMAS": _show_parser("SCHEMAS"), 408 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 409 "OBJECTS": _show_parser("OBJECTS"), 410 "TERSE OBJECTS": _show_parser("OBJECTS"), 411 "TABLES": _show_parser("TABLES"), 412 "TERSE TABLES": _show_parser("TABLES"), 413 "VIEWS": _show_parser("VIEWS"), 414 "TERSE VIEWS": _show_parser("VIEWS"), 415 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 416 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 417 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 418 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 419 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 420 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 421 "SEQUENCES": _show_parser("SEQUENCES"), 422 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 423 "COLUMNS": _show_parser("COLUMNS"), 424 "USERS": _show_parser("USERS"), 425 "TERSE USERS": _show_parser("USERS"), 426 } 427 428 STAGED_FILE_SINGLE_TOKENS = { 429 TokenType.DOT, 430 TokenType.MOD, 431 TokenType.SLASH, 432 } 433 434 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 435 436 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 437 438 def _parse_colon_get_path( 439 self: parser.Parser, this: t.Optional[exp.Expression] 440 ) -> t.Optional[exp.Expression]: 441 while True: 442 path = self._parse_bitwise() 443 444 # The cast :: operator has a lower precedence than the extraction operator :, so 445 # we rearrange the AST appropriately to avoid casting the 2nd argument of GET_PATH 446 if isinstance(path, exp.Cast): 447 target_type = path.to 448 path = path.this 449 else: 450 target_type = None 451 452 if isinstance(path, exp.Expression): 453 path = exp.Literal.string(path.sql(dialect="snowflake")) 454 455 # The extraction operator : is left-associative 456 this = self.expression( 457 exp.JSONExtract, this=this, expression=self.dialect.to_json_path(path) 458 ) 459 460 if target_type: 461 this = exp.cast(this, target_type) 462 463 if not self._match(TokenType.COLON): 464 break 465 466 return self._parse_range(this) 467 468 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 469 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 470 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 471 this = self._parse_var() or self._parse_type() 472 473 if not this: 474 return None 475 476 self._match(TokenType.COMMA) 477 expression = self._parse_bitwise() 478 this = _map_date_part(this) 479 name = this.name.upper() 480 481 if name.startswith("EPOCH"): 482 if name == "EPOCH_MILLISECOND": 483 scale = 10**3 484 elif name == "EPOCH_MICROSECOND": 485 scale = 10**6 486 elif name == "EPOCH_NANOSECOND": 487 scale = 10**9 488 else: 489 scale = None 490 491 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 492 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 493 494 if scale: 495 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 496 497 return to_unix 498 499 return self.expression(exp.Extract, this=this, expression=expression) 500 501 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 502 if is_map: 503 # Keys are strings in Snowflake's objects, see also: 504 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 505 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 506 return self._parse_slice(self._parse_string()) 507 508 return self._parse_slice(self._parse_alias(self._parse_conjunction(), explicit=True)) 509 510 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 511 lateral = super()._parse_lateral() 512 if not lateral: 513 return lateral 514 515 if isinstance(lateral.this, exp.Explode): 516 table_alias = lateral.args.get("alias") 517 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 518 if table_alias and not table_alias.args.get("columns"): 519 table_alias.set("columns", columns) 520 elif not table_alias: 521 exp.alias_(lateral, "_flattened", table=columns, copy=False) 522 523 return lateral 524 525 def _parse_at_before(self, table: exp.Table) -> exp.Table: 526 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 527 index = self._index 528 if self._match_texts(("AT", "BEFORE")): 529 this = self._prev.text.upper() 530 kind = ( 531 self._match(TokenType.L_PAREN) 532 and self._match_texts(self.HISTORICAL_DATA_KIND) 533 and self._prev.text.upper() 534 ) 535 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 536 537 if expression: 538 self._match_r_paren() 539 when = self.expression( 540 exp.HistoricalData, this=this, kind=kind, expression=expression 541 ) 542 table.set("when", when) 543 else: 544 self._retreat(index) 545 546 return table 547 548 def _parse_table_parts( 549 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 550 ) -> exp.Table: 551 # https://docs.snowflake.com/en/user-guide/querying-stage 552 if self._match(TokenType.STRING, advance=False): 553 table = self._parse_string() 554 elif self._match_text_seq("@", advance=False): 555 table = self._parse_location_path() 556 else: 557 table = None 558 559 if table: 560 file_format = None 561 pattern = None 562 563 self._match(TokenType.L_PAREN) 564 while self._curr and not self._match(TokenType.R_PAREN): 565 if self._match_text_seq("FILE_FORMAT", "=>"): 566 file_format = self._parse_string() or super()._parse_table_parts( 567 is_db_reference=is_db_reference 568 ) 569 elif self._match_text_seq("PATTERN", "=>"): 570 pattern = self._parse_string() 571 else: 572 break 573 574 self._match(TokenType.COMMA) 575 576 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 577 else: 578 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 579 580 return self._parse_at_before(table) 581 582 def _parse_id_var( 583 self, 584 any_token: bool = True, 585 tokens: t.Optional[t.Collection[TokenType]] = None, 586 ) -> t.Optional[exp.Expression]: 587 if self._match_text_seq("IDENTIFIER", "("): 588 identifier = ( 589 super()._parse_id_var(any_token=any_token, tokens=tokens) 590 or self._parse_string() 591 ) 592 self._match_r_paren() 593 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 594 595 return super()._parse_id_var(any_token=any_token, tokens=tokens) 596 597 def _parse_show_snowflake(self, this: str) -> exp.Show: 598 scope = None 599 scope_kind = None 600 601 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 602 # which is syntactically valid but has no effect on the output 603 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 604 605 history = self._match_text_seq("HISTORY") 606 607 like = self._parse_string() if self._match(TokenType.LIKE) else None 608 609 if self._match(TokenType.IN): 610 if self._match_text_seq("ACCOUNT"): 611 scope_kind = "ACCOUNT" 612 elif self._match_set(self.DB_CREATABLES): 613 scope_kind = self._prev.text.upper() 614 if self._curr: 615 scope = self._parse_table_parts() 616 elif self._curr: 617 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 618 scope = self._parse_table_parts() 619 620 return self.expression( 621 exp.Show, 622 **{ 623 "terse": terse, 624 "this": this, 625 "history": history, 626 "like": like, 627 "scope": scope, 628 "scope_kind": scope_kind, 629 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 630 "limit": self._parse_limit(), 631 "from": self._parse_string() if self._match(TokenType.FROM) else None, 632 }, 633 ) 634 635 def _parse_alter_table_swap(self) -> exp.SwapTable: 636 self._match_text_seq("WITH") 637 return self.expression(exp.SwapTable, this=self._parse_table(schema=True)) 638 639 def _parse_location(self) -> exp.LocationProperty: 640 self._match(TokenType.EQ) 641 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 642 643 def _parse_location_path(self) -> exp.Var: 644 parts = [self._advance_any(ignore_reserved=True)] 645 646 # We avoid consuming a comma token because external tables like @foo and @bar 647 # can be joined in a query with a comma separator. 648 while self._is_connected() and not self._match(TokenType.COMMA, advance=False): 649 parts.append(self._advance_any(ignore_reserved=True)) 650 651 return exp.var("".join(part.text for part in parts if part))
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- CONSTRAINT_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
653 class Tokenizer(tokens.Tokenizer): 654 STRING_ESCAPES = ["\\", "'"] 655 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 656 RAW_STRINGS = ["$$"] 657 COMMENTS = ["--", "//", ("/*", "*/")] 658 659 KEYWORDS = { 660 **tokens.Tokenizer.KEYWORDS, 661 "BYTEINT": TokenType.INT, 662 "CHAR VARYING": TokenType.VARCHAR, 663 "CHARACTER VARYING": TokenType.VARCHAR, 664 "EXCLUDE": TokenType.EXCEPT, 665 "ILIKE ANY": TokenType.ILIKE_ANY, 666 "LIKE ANY": TokenType.LIKE_ANY, 667 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 668 "MINUS": TokenType.EXCEPT, 669 "NCHAR VARYING": TokenType.VARCHAR, 670 "PUT": TokenType.COMMAND, 671 "REMOVE": TokenType.COMMAND, 672 "RENAME": TokenType.REPLACE, 673 "RM": TokenType.COMMAND, 674 "SAMPLE": TokenType.TABLE_SAMPLE, 675 "SQL_DOUBLE": TokenType.DOUBLE, 676 "SQL_VARCHAR": TokenType.VARCHAR, 677 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 678 "TIMESTAMP_LTZ": TokenType.TIMESTAMPLTZ, 679 "TIMESTAMP_NTZ": TokenType.TIMESTAMP, 680 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 681 "TIMESTAMPNTZ": TokenType.TIMESTAMP, 682 "TOP": TokenType.TOP, 683 } 684 685 SINGLE_TOKENS = { 686 **tokens.Tokenizer.SINGLE_TOKENS, 687 "$": TokenType.PARAMETER, 688 } 689 690 VAR_SINGLE_TOKENS = {"$"} 691 692 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
694 class Generator(generator.Generator): 695 PARAMETER_TOKEN = "$" 696 MATCHED_BY_SOURCE = False 697 SINGLE_STRING_INTERVAL = True 698 JOIN_HINTS = False 699 TABLE_HINTS = False 700 QUERY_HINTS = False 701 AGGREGATE_FILTER_SUPPORTED = False 702 SUPPORTS_TABLE_COPY = False 703 COLLATE_IS_FUNC = True 704 LIMIT_ONLY_LITERALS = True 705 JSON_KEY_VALUE_PAIR_SEP = "," 706 INSERT_OVERWRITE = " OVERWRITE INTO" 707 708 TRANSFORMS = { 709 **generator.Generator.TRANSFORMS, 710 exp.ArgMax: rename_func("MAX_BY"), 711 exp.ArgMin: rename_func("MIN_BY"), 712 exp.Array: inline_array_sql, 713 exp.ArrayConcat: rename_func("ARRAY_CAT"), 714 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 715 exp.ArrayJoin: rename_func("ARRAY_TO_STRING"), 716 exp.AtTimeZone: lambda self, e: self.func( 717 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 718 ), 719 exp.BitwiseXor: rename_func("BITXOR"), 720 exp.DateAdd: date_delta_sql("DATEADD"), 721 exp.DateDiff: date_delta_sql("DATEDIFF"), 722 exp.DateStrToDate: datestrtodate_sql, 723 exp.DataType: _datatype_sql, 724 exp.DayOfMonth: rename_func("DAYOFMONTH"), 725 exp.DayOfWeek: rename_func("DAYOFWEEK"), 726 exp.DayOfYear: rename_func("DAYOFYEAR"), 727 exp.Explode: rename_func("FLATTEN"), 728 exp.Extract: rename_func("DATE_PART"), 729 exp.FromTimeZone: lambda self, e: self.func( 730 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 731 ), 732 exp.GenerateSeries: lambda self, e: self.func( 733 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 734 ), 735 exp.GroupConcat: rename_func("LISTAGG"), 736 exp.If: if_sql(name="IFF", false_value="NULL"), 737 exp.JSONExtract: lambda self, e: self.func("GET_PATH", e.this, e.expression), 738 exp.JSONExtractScalar: lambda self, e: self.func( 739 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 740 ), 741 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 742 exp.JSONPathRoot: lambda *_: "", 743 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 744 exp.LogicalOr: rename_func("BOOLOR_AGG"), 745 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 746 exp.Max: max_or_greatest, 747 exp.Min: min_or_least, 748 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 749 exp.PercentileCont: transforms.preprocess( 750 [transforms.add_within_group_for_percentiles] 751 ), 752 exp.PercentileDisc: transforms.preprocess( 753 [transforms.add_within_group_for_percentiles] 754 ), 755 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 756 exp.RegexpILike: _regexpilike_sql, 757 exp.Rand: rename_func("RANDOM"), 758 exp.Select: transforms.preprocess( 759 [ 760 transforms.eliminate_distinct_on, 761 transforms.explode_to_unnest(), 762 transforms.eliminate_semi_and_anti_joins, 763 ] 764 ), 765 exp.SHA: rename_func("SHA1"), 766 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 767 exp.StartsWith: rename_func("STARTSWITH"), 768 exp.StrPosition: lambda self, e: self.func( 769 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 770 ), 771 exp.StrToTime: lambda self, e: self.func("TO_TIMESTAMP", e.this, self.format_time(e)), 772 exp.Struct: lambda self, e: self.func( 773 "OBJECT_CONSTRUCT", 774 *(arg for expression in e.expressions for arg in expression.flatten()), 775 ), 776 exp.Stuff: rename_func("INSERT"), 777 exp.TimestampDiff: lambda self, e: self.func( 778 "TIMESTAMPDIFF", e.unit, e.expression, e.this 779 ), 780 exp.TimestampTrunc: timestamptrunc_sql, 781 exp.TimeStrToTime: timestrtotime_sql, 782 exp.TimeToStr: lambda self, e: self.func( 783 "TO_CHAR", exp.cast(e.this, "timestamp"), self.format_time(e) 784 ), 785 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 786 exp.ToArray: rename_func("TO_ARRAY"), 787 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 788 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 789 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 790 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 791 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 792 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 793 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 794 exp.Xor: rename_func("BOOLXOR"), 795 } 796 797 SUPPORTED_JSON_PATH_PARTS = { 798 exp.JSONPathKey, 799 exp.JSONPathRoot, 800 exp.JSONPathSubscript, 801 } 802 803 TYPE_MAPPING = { 804 **generator.Generator.TYPE_MAPPING, 805 exp.DataType.Type.TIMESTAMP: "TIMESTAMPNTZ", 806 } 807 808 STAR_MAPPING = { 809 "except": "EXCLUDE", 810 "replace": "RENAME", 811 } 812 813 PROPERTIES_LOCATION = { 814 **generator.Generator.PROPERTIES_LOCATION, 815 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 816 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 817 } 818 819 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 820 milli = expression.args.get("milli") 821 if milli is not None: 822 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 823 expression.set("nano", milli_to_nano) 824 825 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 826 827 def trycast_sql(self, expression: exp.TryCast) -> str: 828 value = expression.this 829 830 if value.type is None: 831 from sqlglot.optimizer.annotate_types import annotate_types 832 833 value = annotate_types(value) 834 835 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 836 return super().trycast_sql(expression) 837 838 # TRY_CAST only works for string values in Snowflake 839 return self.cast_sql(expression) 840 841 def log_sql(self, expression: exp.Log) -> str: 842 if not expression.expression: 843 return self.func("LN", expression.this) 844 845 return super().log_sql(expression) 846 847 def unnest_sql(self, expression: exp.Unnest) -> str: 848 unnest_alias = expression.args.get("alias") 849 offset = expression.args.get("offset") 850 851 columns = [ 852 exp.to_identifier("seq"), 853 exp.to_identifier("key"), 854 exp.to_identifier("path"), 855 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 856 seq_get(unnest_alias.columns if unnest_alias else [], 0) 857 or exp.to_identifier("value"), 858 exp.to_identifier("this"), 859 ] 860 861 if unnest_alias: 862 unnest_alias.set("columns", columns) 863 else: 864 unnest_alias = exp.TableAlias(this="_u", columns=columns) 865 866 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 867 alias = self.sql(unnest_alias) 868 alias = f" AS {alias}" if alias else "" 869 return f"{explode}{alias}" 870 871 def show_sql(self, expression: exp.Show) -> str: 872 terse = "TERSE " if expression.args.get("terse") else "" 873 history = " HISTORY" if expression.args.get("history") else "" 874 like = self.sql(expression, "like") 875 like = f" LIKE {like}" if like else "" 876 877 scope = self.sql(expression, "scope") 878 scope = f" {scope}" if scope else "" 879 880 scope_kind = self.sql(expression, "scope_kind") 881 if scope_kind: 882 scope_kind = f" IN {scope_kind}" 883 884 starts_with = self.sql(expression, "starts_with") 885 if starts_with: 886 starts_with = f" STARTS WITH {starts_with}" 887 888 limit = self.sql(expression, "limit") 889 890 from_ = self.sql(expression, "from") 891 if from_: 892 from_ = f" FROM {from_}" 893 894 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 895 896 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 897 # Other dialects don't support all of the following parameters, so we need to 898 # generate default values as necessary to ensure the transpilation is correct 899 group = expression.args.get("group") 900 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 901 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 902 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 903 904 return self.func( 905 "REGEXP_SUBSTR", 906 expression.this, 907 expression.expression, 908 position, 909 occurrence, 910 parameters, 911 group, 912 ) 913 914 def except_op(self, expression: exp.Except) -> str: 915 if not expression.args.get("distinct"): 916 self.unsupported("EXCEPT with All is not supported in Snowflake") 917 return super().except_op(expression) 918 919 def intersect_op(self, expression: exp.Intersect) -> str: 920 if not expression.args.get("distinct"): 921 self.unsupported("INTERSECT with All is not supported in Snowflake") 922 return super().intersect_op(expression) 923 924 def describe_sql(self, expression: exp.Describe) -> str: 925 # Default to table if kind is unknown 926 kind_value = expression.args.get("kind") or "TABLE" 927 kind = f" {kind_value}" if kind_value else "" 928 this = f" {self.sql(expression, 'this')}" 929 expressions = self.expressions(expression, flat=True) 930 expressions = f" {expressions}" if expressions else "" 931 return f"DESCRIBE{kind}{this}{expressions}" 932 933 def generatedasidentitycolumnconstraint_sql( 934 self, expression: exp.GeneratedAsIdentityColumnConstraint 935 ) -> str: 936 start = expression.args.get("start") 937 start = f" START {start}" if start else "" 938 increment = expression.args.get("increment") 939 increment = f" INCREMENT {increment}" if increment else "" 940 return f"AUTOINCREMENT{start}{increment}" 941 942 def swaptable_sql(self, expression: exp.SwapTable) -> str: 943 this = self.sql(expression, "this") 944 return f"SWAP WITH {this}" 945 946 def with_properties(self, properties: exp.Properties) -> str: 947 return self.properties(properties, wrapped=False, prefix=self.seg(""), sep=" ") 948 949 def cluster_sql(self, expression: exp.Cluster) -> str: 950 return f"CLUSTER BY ({self.expressions(expression, flat=True)})"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. Default: 2.
- indent: The indentation size in a formatted string. Default: 2.
- normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
819 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 820 milli = expression.args.get("milli") 821 if milli is not None: 822 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 823 expression.set("nano", milli_to_nano) 824 825 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
827 def trycast_sql(self, expression: exp.TryCast) -> str: 828 value = expression.this 829 830 if value.type is None: 831 from sqlglot.optimizer.annotate_types import annotate_types 832 833 value = annotate_types(value) 834 835 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 836 return super().trycast_sql(expression) 837 838 # TRY_CAST only works for string values in Snowflake 839 return self.cast_sql(expression)
847 def unnest_sql(self, expression: exp.Unnest) -> str: 848 unnest_alias = expression.args.get("alias") 849 offset = expression.args.get("offset") 850 851 columns = [ 852 exp.to_identifier("seq"), 853 exp.to_identifier("key"), 854 exp.to_identifier("path"), 855 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 856 seq_get(unnest_alias.columns if unnest_alias else [], 0) 857 or exp.to_identifier("value"), 858 exp.to_identifier("this"), 859 ] 860 861 if unnest_alias: 862 unnest_alias.set("columns", columns) 863 else: 864 unnest_alias = exp.TableAlias(this="_u", columns=columns) 865 866 explode = f"TABLE(FLATTEN(INPUT => {self.sql(expression.expressions[0])}))" 867 alias = self.sql(unnest_alias) 868 alias = f" AS {alias}" if alias else "" 869 return f"{explode}{alias}"
871 def show_sql(self, expression: exp.Show) -> str: 872 terse = "TERSE " if expression.args.get("terse") else "" 873 history = " HISTORY" if expression.args.get("history") else "" 874 like = self.sql(expression, "like") 875 like = f" LIKE {like}" if like else "" 876 877 scope = self.sql(expression, "scope") 878 scope = f" {scope}" if scope else "" 879 880 scope_kind = self.sql(expression, "scope_kind") 881 if scope_kind: 882 scope_kind = f" IN {scope_kind}" 883 884 starts_with = self.sql(expression, "starts_with") 885 if starts_with: 886 starts_with = f" STARTS WITH {starts_with}" 887 888 limit = self.sql(expression, "limit") 889 890 from_ = self.sql(expression, "from") 891 if from_: 892 from_ = f" FROM {from_}" 893 894 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
896 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 897 # Other dialects don't support all of the following parameters, so we need to 898 # generate default values as necessary to ensure the transpilation is correct 899 group = expression.args.get("group") 900 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 901 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 902 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 903 904 return self.func( 905 "REGEXP_SUBSTR", 906 expression.this, 907 expression.expression, 908 position, 909 occurrence, 910 parameters, 911 group, 912 )
924 def describe_sql(self, expression: exp.Describe) -> str: 925 # Default to table if kind is unknown 926 kind_value = expression.args.get("kind") or "TABLE" 927 kind = f" {kind_value}" if kind_value else "" 928 this = f" {self.sql(expression, 'this')}" 929 expressions = self.expressions(expression, flat=True) 930 expressions = f" {expressions}" if expressions else "" 931 return f"DESCRIBE{kind}{this}{expressions}"
933 def generatedasidentitycolumnconstraint_sql( 934 self, expression: exp.GeneratedAsIdentityColumnConstraint 935 ) -> str: 936 start = expression.args.get("start") 937 start = f" START {start}" if start else "" 938 increment = expression.args.get("increment") 939 increment = f" INCREMENT {increment}" if increment else "" 940 return f"AUTOINCREMENT{start}{increment}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- arrayany_sql