sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 binary_from_function, 10 build_default_decimal_type, 11 build_timestamp_from_parts, 12 date_delta_sql, 13 date_trunc_to_time, 14 datestrtodate_sql, 15 build_formatted_time, 16 if_sql, 17 inline_array_sql, 18 max_or_greatest, 19 min_or_least, 20 rename_func, 21 timestamptrunc_sql, 22 timestrtotime_sql, 23 var_map_sql, 24 map_date_part, 25 no_safe_divide_sql, 26 no_timestamp_sql, 27 timestampdiff_sql, 28 no_make_interval_sql, 29) 30from sqlglot.generator import unsupported_args 31from sqlglot.helper import flatten, is_float, is_int, seq_get 32from sqlglot.tokens import TokenType 33 34if t.TYPE_CHECKING: 35 from sqlglot._typing import E, B 36 37 38# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 39def _build_datetime( 40 name: str, kind: exp.DataType.Type, safe: bool = False 41) -> t.Callable[[t.List], exp.Func]: 42 def _builder(args: t.List) -> exp.Func: 43 value = seq_get(args, 0) 44 scale_or_fmt = seq_get(args, 1) 45 46 int_value = value is not None and is_int(value.name) 47 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 48 49 if isinstance(value, exp.Literal) or (value and scale_or_fmt): 50 # Converts calls like `TO_TIME('01:02:03')` into casts 51 if len(args) == 1 and value.is_string and not int_value: 52 return ( 53 exp.TryCast(this=value, to=exp.DataType.build(kind)) 54 if safe 55 else exp.cast(value, kind) 56 ) 57 58 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 59 # cases so we can transpile them, since they're relatively common 60 if kind == exp.DataType.Type.TIMESTAMP: 61 if not safe and (int_value or int_scale_or_fmt): 62 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 63 # it's not easily transpilable 64 return exp.UnixToTime(this=value, scale=scale_or_fmt) 65 if not int_scale_or_fmt and not is_float(value.name): 66 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 67 expr.set("safe", safe) 68 return expr 69 70 if kind == exp.DataType.Type.DATE and not int_value: 71 formatted_exp = build_formatted_time(exp.TsOrDsToDate, "snowflake")(args) 72 formatted_exp.set("safe", safe) 73 return formatted_exp 74 75 return exp.Anonymous(this=name, expressions=args) 76 77 return _builder 78 79 80def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 81 expression = parser.build_var_map(args) 82 83 if isinstance(expression, exp.StarMap): 84 return expression 85 86 return exp.Struct( 87 expressions=[ 88 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 89 ] 90 ) 91 92 93def _build_datediff(args: t.List) -> exp.DateDiff: 94 return exp.DateDiff( 95 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 96 ) 97 98 99def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 100 def _builder(args: t.List) -> E: 101 return expr_type( 102 this=seq_get(args, 2), 103 expression=seq_get(args, 1), 104 unit=map_date_part(seq_get(args, 0)), 105 ) 106 107 return _builder 108 109 110def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 111 def _builder(args: t.List) -> B | exp.Anonymous: 112 if len(args) == 3: 113 return exp.Anonymous(this=name, expressions=args) 114 115 return binary_from_function(expr_type)(args) 116 117 return _builder 118 119 120# https://docs.snowflake.com/en/sql-reference/functions/div0 121def _build_if_from_div0(args: t.List) -> exp.If: 122 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 123 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 124 125 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 126 exp.Is(this=lhs, expression=exp.null()).not_() 127 ) 128 true = exp.Literal.number(0) 129 false = exp.Div(this=lhs, expression=rhs) 130 return exp.If(this=cond, true=true, false=false) 131 132 133# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 134def _build_if_from_zeroifnull(args: t.List) -> exp.If: 135 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 136 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 137 138 139# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 140def _build_if_from_nullifzero(args: t.List) -> exp.If: 141 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 142 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 143 144 145def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 146 flag = expression.text("flag") 147 148 if "i" not in flag: 149 flag += "i" 150 151 return self.func( 152 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 153 ) 154 155 156def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 157 regexp_replace = exp.RegexpReplace.from_arg_list(args) 158 159 if not regexp_replace.args.get("replacement"): 160 regexp_replace.set("replacement", exp.Literal.string("")) 161 162 return regexp_replace 163 164 165def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 166 def _parse(self: Snowflake.Parser) -> exp.Show: 167 return self._parse_show_snowflake(*args, **kwargs) 168 169 return _parse 170 171 172def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 173 trunc = date_trunc_to_time(args) 174 trunc.set("unit", map_date_part(trunc.args["unit"])) 175 return trunc 176 177 178def _unqualify_unpivot_columns(expression: exp.Expression) -> exp.Expression: 179 """ 180 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 181 so we need to unqualify them. 182 183 Example: 184 >>> from sqlglot import parse_one 185 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 186 >>> print(_unqualify_unpivot_columns(expr).sql(dialect="snowflake")) 187 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 188 """ 189 if isinstance(expression, exp.Pivot) and expression.unpivot: 190 expression = transforms.unqualify_columns(expression) 191 192 return expression 193 194 195def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 196 assert isinstance(expression, exp.Create) 197 198 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 199 if expression.this in exp.DataType.NESTED_TYPES: 200 expression.set("expressions", None) 201 return expression 202 203 props = expression.args.get("properties") 204 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 205 for schema_expression in expression.this.expressions: 206 if isinstance(schema_expression, exp.ColumnDef): 207 column_type = schema_expression.kind 208 if isinstance(column_type, exp.DataType): 209 column_type.transform(_flatten_structured_type, copy=False) 210 211 return expression 212 213 214def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 215 generate_date_array = unnest.expressions[0] 216 start = generate_date_array.args.get("start") 217 end = generate_date_array.args.get("end") 218 step = generate_date_array.args.get("step") 219 220 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 221 return 222 223 unit = step.args.get("unit") 224 225 unnest_alias = unnest.args.get("alias") 226 if unnest_alias: 227 unnest_alias = unnest_alias.copy() 228 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 229 else: 230 sequence_value_name = "value" 231 232 # We'll add the next sequence value to the starting date and project the result 233 date_add = _build_date_time_add(exp.DateAdd)( 234 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 235 ).as_(sequence_value_name) 236 237 # We use DATEDIFF to compute the number of sequence values needed 238 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 239 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 240 ) 241 242 unnest.set("expressions", [number_sequence]) 243 unnest.replace(exp.select(date_add).from_(unnest.copy()).subquery(unnest_alias)) 244 245 246def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 247 if isinstance(expression, exp.Select): 248 for generate_date_array in expression.find_all(exp.GenerateDateArray): 249 parent = generate_date_array.parent 250 251 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 252 # query is the following (it'll be unnested properly on the next iteration due to copy): 253 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 254 if not isinstance(parent, exp.Unnest): 255 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 256 generate_date_array.replace( 257 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 258 ) 259 260 if ( 261 isinstance(parent, exp.Unnest) 262 and isinstance(parent.parent, (exp.From, exp.Join)) 263 and len(parent.expressions) == 1 264 ): 265 _unnest_generate_date_array(parent) 266 267 return expression 268 269 270def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 271 def _builder(args: t.List) -> E: 272 return expr_type( 273 this=seq_get(args, 0), 274 expression=seq_get(args, 1), 275 position=seq_get(args, 2), 276 occurrence=seq_get(args, 3), 277 parameters=seq_get(args, 4), 278 group=seq_get(args, 5) or exp.Literal.number(0), 279 ) 280 281 return _builder 282 283 284def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 285 # Other dialects don't support all of the following parameters, so we need to 286 # generate default values as necessary to ensure the transpilation is correct 287 group = expression.args.get("group") 288 289 # To avoid generating all these default values, we set group to None if 290 # it's 0 (also default value) which doesn't trigger the following chain 291 if group and group.name == "0": 292 group = None 293 294 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 295 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 296 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 297 298 return self.func( 299 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 300 expression.this, 301 expression.expression, 302 position, 303 occurrence, 304 parameters, 305 group, 306 ) 307 308 309def _json_extract_value_array_sql( 310 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 311) -> str: 312 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 313 ident = exp.to_identifier("x") 314 315 if isinstance(expression, exp.JSONValueArray): 316 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 317 else: 318 this = exp.ParseJSON(this=f"TO_JSON({ident})") 319 320 transform_lambda = exp.Lambda(expressions=[ident], this=this) 321 322 return self.func("TRANSFORM", json_extract, transform_lambda) 323 324 325class Snowflake(Dialect): 326 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 327 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 328 NULL_ORDERING = "nulls_are_large" 329 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 330 SUPPORTS_USER_DEFINED_TYPES = False 331 SUPPORTS_SEMI_ANTI_JOIN = False 332 PREFER_CTE_ALIAS_COLUMN = True 333 TABLESAMPLE_SIZE_IS_PERCENT = True 334 COPY_PARAMS_ARE_CSV = False 335 ARRAY_AGG_INCLUDES_NULLS = None 336 337 TIME_MAPPING = { 338 "YYYY": "%Y", 339 "yyyy": "%Y", 340 "YY": "%y", 341 "yy": "%y", 342 "MMMM": "%B", 343 "mmmm": "%B", 344 "MON": "%b", 345 "mon": "%b", 346 "MM": "%m", 347 "mm": "%m", 348 "DD": "%d", 349 "dd": "%-d", 350 "DY": "%a", 351 "dy": "%w", 352 "HH24": "%H", 353 "hh24": "%H", 354 "HH12": "%I", 355 "hh12": "%I", 356 "MI": "%M", 357 "mi": "%M", 358 "SS": "%S", 359 "ss": "%S", 360 "FF": "%f", 361 "ff": "%f", 362 "FF6": "%f", 363 "ff6": "%f", 364 } 365 366 def quote_identifier(self, expression: E, identify: bool = True) -> E: 367 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 368 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 369 if ( 370 isinstance(expression, exp.Identifier) 371 and isinstance(expression.parent, exp.Table) 372 and expression.name.lower() == "dual" 373 ): 374 return expression # type: ignore 375 376 return super().quote_identifier(expression, identify=identify) 377 378 class Parser(parser.Parser): 379 IDENTIFY_PIVOT_STRINGS = True 380 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 381 COLON_IS_VARIANT_EXTRACT = True 382 383 ID_VAR_TOKENS = { 384 *parser.Parser.ID_VAR_TOKENS, 385 TokenType.MATCH_CONDITION, 386 } 387 388 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 389 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 390 391 FUNCTIONS = { 392 **parser.Parser.FUNCTIONS, 393 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 394 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 395 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 396 this=seq_get(args, 1), expression=seq_get(args, 0) 397 ), 398 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 399 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 400 start=seq_get(args, 0), 401 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 402 step=seq_get(args, 2), 403 ), 404 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 405 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 406 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 407 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 408 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 409 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 410 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 411 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 412 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 413 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 414 "DATE_TRUNC": _date_trunc_to_time, 415 "DATEADD": _build_date_time_add(exp.DateAdd), 416 "DATEDIFF": _build_datediff, 417 "DIV0": _build_if_from_div0, 418 "EDITDISTANCE": lambda args: exp.Levenshtein( 419 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 420 ), 421 "FLATTEN": exp.Explode.from_arg_list, 422 "GET_PATH": lambda args, dialect: exp.JSONExtract( 423 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 424 ), 425 "IFF": exp.If.from_arg_list, 426 "LAST_DAY": lambda args: exp.LastDay( 427 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 428 ), 429 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 430 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 431 "LISTAGG": exp.GroupConcat.from_arg_list, 432 "NULLIFZERO": _build_if_from_nullifzero, 433 "OBJECT_CONSTRUCT": _build_object_construct, 434 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 435 "REGEXP_REPLACE": _build_regexp_replace, 436 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 437 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 438 "RLIKE": exp.RegexpLike.from_arg_list, 439 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 440 "TIMEADD": _build_date_time_add(exp.TimeAdd), 441 "TIMEDIFF": _build_datediff, 442 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 443 "TIMESTAMPDIFF": _build_datediff, 444 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 445 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 446 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 447 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 448 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 449 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 450 "TRY_TO_TIMESTAMP": _build_datetime( 451 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 452 ), 453 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 454 "TO_NUMBER": lambda args: exp.ToNumber( 455 this=seq_get(args, 0), 456 format=seq_get(args, 1), 457 precision=seq_get(args, 2), 458 scale=seq_get(args, 3), 459 ), 460 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 461 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 462 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 463 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 464 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 465 "TO_VARCHAR": exp.ToChar.from_arg_list, 466 "ZEROIFNULL": _build_if_from_zeroifnull, 467 } 468 469 FUNCTION_PARSERS = { 470 **parser.Parser.FUNCTION_PARSERS, 471 "DATE_PART": lambda self: self._parse_date_part(), 472 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 473 } 474 FUNCTION_PARSERS.pop("TRIM") 475 476 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 477 478 RANGE_PARSERS = { 479 **parser.Parser.RANGE_PARSERS, 480 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 481 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 482 } 483 484 ALTER_PARSERS = { 485 **parser.Parser.ALTER_PARSERS, 486 "UNSET": lambda self: self.expression( 487 exp.Set, 488 tag=self._match_text_seq("TAG"), 489 expressions=self._parse_csv(self._parse_id_var), 490 unset=True, 491 ), 492 } 493 494 STATEMENT_PARSERS = { 495 **parser.Parser.STATEMENT_PARSERS, 496 TokenType.SHOW: lambda self: self._parse_show(), 497 } 498 499 PROPERTY_PARSERS = { 500 **parser.Parser.PROPERTY_PARSERS, 501 "LOCATION": lambda self: self._parse_location_property(), 502 "TAG": lambda self: self._parse_tag(), 503 } 504 505 TYPE_CONVERTERS = { 506 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 507 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 508 } 509 510 SHOW_PARSERS = { 511 "SCHEMAS": _show_parser("SCHEMAS"), 512 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 513 "OBJECTS": _show_parser("OBJECTS"), 514 "TERSE OBJECTS": _show_parser("OBJECTS"), 515 "TABLES": _show_parser("TABLES"), 516 "TERSE TABLES": _show_parser("TABLES"), 517 "VIEWS": _show_parser("VIEWS"), 518 "TERSE VIEWS": _show_parser("VIEWS"), 519 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 520 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 521 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 522 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 523 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 524 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 525 "SEQUENCES": _show_parser("SEQUENCES"), 526 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 527 "COLUMNS": _show_parser("COLUMNS"), 528 "USERS": _show_parser("USERS"), 529 "TERSE USERS": _show_parser("USERS"), 530 } 531 532 CONSTRAINT_PARSERS = { 533 **parser.Parser.CONSTRAINT_PARSERS, 534 "WITH": lambda self: self._parse_with_constraint(), 535 "MASKING": lambda self: self._parse_with_constraint(), 536 "PROJECTION": lambda self: self._parse_with_constraint(), 537 "TAG": lambda self: self._parse_with_constraint(), 538 } 539 540 STAGED_FILE_SINGLE_TOKENS = { 541 TokenType.DOT, 542 TokenType.MOD, 543 TokenType.SLASH, 544 } 545 546 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 547 548 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 549 550 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 551 552 LAMBDAS = { 553 **parser.Parser.LAMBDAS, 554 TokenType.ARROW: lambda self, expressions: self.expression( 555 exp.Lambda, 556 this=self._replace_lambda( 557 self._parse_assignment(), 558 expressions, 559 ), 560 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 561 ), 562 } 563 564 def _negate_range( 565 self, this: t.Optional[exp.Expression] = None 566 ) -> t.Optional[exp.Expression]: 567 if not this: 568 return this 569 570 query = this.args.get("query") 571 if isinstance(this, exp.In) and isinstance(query, exp.Query): 572 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 573 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 574 # which can produce different results (most likely a SnowFlake bug). 575 # 576 # https://docs.snowflake.com/en/sql-reference/functions/in 577 # Context: https://github.com/tobymao/sqlglot/issues/3890 578 return self.expression( 579 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 580 ) 581 582 return self.expression(exp.Not, this=this) 583 584 def _parse_tag(self) -> exp.Tags: 585 return self.expression( 586 exp.Tags, 587 expressions=self._parse_wrapped_csv(self._parse_property), 588 ) 589 590 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 591 if self._prev.token_type != TokenType.WITH: 592 self._retreat(self._index - 1) 593 594 if self._match_text_seq("MASKING", "POLICY"): 595 policy = self._parse_column() 596 return self.expression( 597 exp.MaskingPolicyColumnConstraint, 598 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 599 expressions=self._match(TokenType.USING) 600 and self._parse_wrapped_csv(self._parse_id_var), 601 ) 602 if self._match_text_seq("PROJECTION", "POLICY"): 603 policy = self._parse_column() 604 return self.expression( 605 exp.ProjectionPolicyColumnConstraint, 606 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 607 ) 608 if self._match(TokenType.TAG): 609 return self._parse_tag() 610 611 return None 612 613 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 614 if self._match(TokenType.TAG): 615 return self._parse_tag() 616 617 return super()._parse_with_property() 618 619 def _parse_create(self) -> exp.Create | exp.Command: 620 expression = super()._parse_create() 621 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 622 # Replace the Table node with the enclosed Identifier 623 expression.this.replace(expression.this.this) 624 625 return expression 626 627 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 628 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 629 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 630 this = self._parse_var() or self._parse_type() 631 632 if not this: 633 return None 634 635 self._match(TokenType.COMMA) 636 expression = self._parse_bitwise() 637 this = map_date_part(this) 638 name = this.name.upper() 639 640 if name.startswith("EPOCH"): 641 if name == "EPOCH_MILLISECOND": 642 scale = 10**3 643 elif name == "EPOCH_MICROSECOND": 644 scale = 10**6 645 elif name == "EPOCH_NANOSECOND": 646 scale = 10**9 647 else: 648 scale = None 649 650 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 651 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 652 653 if scale: 654 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 655 656 return to_unix 657 658 return self.expression(exp.Extract, this=this, expression=expression) 659 660 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 661 if is_map: 662 # Keys are strings in Snowflake's objects, see also: 663 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 664 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 665 return self._parse_slice(self._parse_string()) 666 667 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 668 669 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 670 lateral = super()._parse_lateral() 671 if not lateral: 672 return lateral 673 674 if isinstance(lateral.this, exp.Explode): 675 table_alias = lateral.args.get("alias") 676 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 677 if table_alias and not table_alias.args.get("columns"): 678 table_alias.set("columns", columns) 679 elif not table_alias: 680 exp.alias_(lateral, "_flattened", table=columns, copy=False) 681 682 return lateral 683 684 def _parse_table_parts( 685 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 686 ) -> exp.Table: 687 # https://docs.snowflake.com/en/user-guide/querying-stage 688 if self._match(TokenType.STRING, advance=False): 689 table = self._parse_string() 690 elif self._match_text_seq("@", advance=False): 691 table = self._parse_location_path() 692 else: 693 table = None 694 695 if table: 696 file_format = None 697 pattern = None 698 699 wrapped = self._match(TokenType.L_PAREN) 700 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 701 if self._match_text_seq("FILE_FORMAT", "=>"): 702 file_format = self._parse_string() or super()._parse_table_parts( 703 is_db_reference=is_db_reference 704 ) 705 elif self._match_text_seq("PATTERN", "=>"): 706 pattern = self._parse_string() 707 else: 708 break 709 710 self._match(TokenType.COMMA) 711 712 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 713 else: 714 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 715 716 return table 717 718 def _parse_id_var( 719 self, 720 any_token: bool = True, 721 tokens: t.Optional[t.Collection[TokenType]] = None, 722 ) -> t.Optional[exp.Expression]: 723 if self._match_text_seq("IDENTIFIER", "("): 724 identifier = ( 725 super()._parse_id_var(any_token=any_token, tokens=tokens) 726 or self._parse_string() 727 ) 728 self._match_r_paren() 729 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 730 731 return super()._parse_id_var(any_token=any_token, tokens=tokens) 732 733 def _parse_show_snowflake(self, this: str) -> exp.Show: 734 scope = None 735 scope_kind = None 736 737 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 738 # which is syntactically valid but has no effect on the output 739 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 740 741 history = self._match_text_seq("HISTORY") 742 743 like = self._parse_string() if self._match(TokenType.LIKE) else None 744 745 if self._match(TokenType.IN): 746 if self._match_text_seq("ACCOUNT"): 747 scope_kind = "ACCOUNT" 748 elif self._match_set(self.DB_CREATABLES): 749 scope_kind = self._prev.text.upper() 750 if self._curr: 751 scope = self._parse_table_parts() 752 elif self._curr: 753 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 754 scope = self._parse_table_parts() 755 756 return self.expression( 757 exp.Show, 758 **{ 759 "terse": terse, 760 "this": this, 761 "history": history, 762 "like": like, 763 "scope": scope, 764 "scope_kind": scope_kind, 765 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 766 "limit": self._parse_limit(), 767 "from": self._parse_string() if self._match(TokenType.FROM) else None, 768 }, 769 ) 770 771 def _parse_location_property(self) -> exp.LocationProperty: 772 self._match(TokenType.EQ) 773 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 774 775 def _parse_file_location(self) -> t.Optional[exp.Expression]: 776 # Parse either a subquery or a staged file 777 return ( 778 self._parse_select(table=True, parse_subquery_alias=False) 779 if self._match(TokenType.L_PAREN, advance=False) 780 else self._parse_table_parts() 781 ) 782 783 def _parse_location_path(self) -> exp.Var: 784 parts = [self._advance_any(ignore_reserved=True)] 785 786 # We avoid consuming a comma token because external tables like @foo and @bar 787 # can be joined in a query with a comma separator, as well as closing paren 788 # in case of subqueries 789 while self._is_connected() and not self._match_set( 790 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 791 ): 792 parts.append(self._advance_any(ignore_reserved=True)) 793 794 return exp.var("".join(part.text for part in parts if part)) 795 796 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 797 this = super()._parse_lambda_arg() 798 799 if not this: 800 return this 801 802 typ = self._parse_types() 803 804 if typ: 805 return self.expression(exp.Cast, this=this, to=typ) 806 807 return this 808 809 def _parse_foreign_key(self) -> exp.ForeignKey: 810 # inlineFK, the REFERENCES columns are implied 811 if self._match(TokenType.REFERENCES, advance=False): 812 return self.expression(exp.ForeignKey) 813 814 # outoflineFK, explicitly names the columns 815 return super()._parse_foreign_key() 816 817 class Tokenizer(tokens.Tokenizer): 818 STRING_ESCAPES = ["\\", "'"] 819 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 820 RAW_STRINGS = ["$$"] 821 COMMENTS = ["--", "//", ("/*", "*/")] 822 NESTED_COMMENTS = False 823 824 KEYWORDS = { 825 **tokens.Tokenizer.KEYWORDS, 826 "BYTEINT": TokenType.INT, 827 "CHAR VARYING": TokenType.VARCHAR, 828 "CHARACTER VARYING": TokenType.VARCHAR, 829 "EXCLUDE": TokenType.EXCEPT, 830 "ILIKE ANY": TokenType.ILIKE_ANY, 831 "LIKE ANY": TokenType.LIKE_ANY, 832 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 833 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 834 "MINUS": TokenType.EXCEPT, 835 "NCHAR VARYING": TokenType.VARCHAR, 836 "PUT": TokenType.COMMAND, 837 "REMOVE": TokenType.COMMAND, 838 "RM": TokenType.COMMAND, 839 "SAMPLE": TokenType.TABLE_SAMPLE, 840 "SQL_DOUBLE": TokenType.DOUBLE, 841 "SQL_VARCHAR": TokenType.VARCHAR, 842 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 843 "TAG": TokenType.TAG, 844 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 845 "TOP": TokenType.TOP, 846 "WAREHOUSE": TokenType.WAREHOUSE, 847 "STREAMLIT": TokenType.STREAMLIT, 848 } 849 KEYWORDS.pop("/*+") 850 851 SINGLE_TOKENS = { 852 **tokens.Tokenizer.SINGLE_TOKENS, 853 "$": TokenType.PARAMETER, 854 } 855 856 VAR_SINGLE_TOKENS = {"$"} 857 858 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 859 860 class Generator(generator.Generator): 861 PARAMETER_TOKEN = "$" 862 MATCHED_BY_SOURCE = False 863 SINGLE_STRING_INTERVAL = True 864 JOIN_HINTS = False 865 TABLE_HINTS = False 866 QUERY_HINTS = False 867 AGGREGATE_FILTER_SUPPORTED = False 868 SUPPORTS_TABLE_COPY = False 869 COLLATE_IS_FUNC = True 870 LIMIT_ONLY_LITERALS = True 871 JSON_KEY_VALUE_PAIR_SEP = "," 872 INSERT_OVERWRITE = " OVERWRITE INTO" 873 STRUCT_DELIMITER = ("(", ")") 874 COPY_PARAMS_ARE_WRAPPED = False 875 COPY_PARAMS_EQ_REQUIRED = True 876 STAR_EXCEPT = "EXCLUDE" 877 SUPPORTS_EXPLODING_PROJECTIONS = False 878 ARRAY_CONCAT_IS_VAR_LEN = False 879 SUPPORTS_CONVERT_TIMEZONE = True 880 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 881 SUPPORTS_MEDIAN = True 882 ARRAY_SIZE_NAME = "ARRAY_SIZE" 883 884 TRANSFORMS = { 885 **generator.Generator.TRANSFORMS, 886 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 887 exp.ArgMax: rename_func("MAX_BY"), 888 exp.ArgMin: rename_func("MIN_BY"), 889 exp.Array: inline_array_sql, 890 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 891 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 892 exp.AtTimeZone: lambda self, e: self.func( 893 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 894 ), 895 exp.BitwiseOr: rename_func("BITOR"), 896 exp.BitwiseXor: rename_func("BITXOR"), 897 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 898 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 899 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 900 exp.DateAdd: date_delta_sql("DATEADD"), 901 exp.DateDiff: date_delta_sql("DATEDIFF"), 902 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 903 exp.DatetimeDiff: timestampdiff_sql, 904 exp.DateStrToDate: datestrtodate_sql, 905 exp.DayOfMonth: rename_func("DAYOFMONTH"), 906 exp.DayOfWeek: rename_func("DAYOFWEEK"), 907 exp.DayOfYear: rename_func("DAYOFYEAR"), 908 exp.Explode: rename_func("FLATTEN"), 909 exp.Extract: rename_func("DATE_PART"), 910 exp.FromTimeZone: lambda self, e: self.func( 911 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 912 ), 913 exp.GenerateSeries: lambda self, e: self.func( 914 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 915 ), 916 exp.GroupConcat: rename_func("LISTAGG"), 917 exp.If: if_sql(name="IFF", false_value="NULL"), 918 exp.JSONExtractArray: _json_extract_value_array_sql, 919 exp.JSONExtractScalar: lambda self, e: self.func( 920 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 921 ), 922 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 923 exp.JSONPathRoot: lambda *_: "", 924 exp.JSONValueArray: _json_extract_value_array_sql, 925 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 926 exp.LogicalOr: rename_func("BOOLOR_AGG"), 927 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 928 exp.MakeInterval: no_make_interval_sql, 929 exp.Max: max_or_greatest, 930 exp.Min: min_or_least, 931 exp.ParseJSON: lambda self, e: self.func( 932 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 933 ), 934 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 935 exp.PercentileCont: transforms.preprocess( 936 [transforms.add_within_group_for_percentiles] 937 ), 938 exp.PercentileDisc: transforms.preprocess( 939 [transforms.add_within_group_for_percentiles] 940 ), 941 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 942 exp.RegexpExtract: _regexpextract_sql, 943 exp.RegexpExtractAll: _regexpextract_sql, 944 exp.RegexpILike: _regexpilike_sql, 945 exp.Rand: rename_func("RANDOM"), 946 exp.Select: transforms.preprocess( 947 [ 948 transforms.eliminate_distinct_on, 949 transforms.explode_to_unnest(), 950 transforms.eliminate_semi_and_anti_joins, 951 _transform_generate_date_array, 952 ] 953 ), 954 exp.SafeDivide: lambda self, e: no_safe_divide_sql(self, e, "IFF"), 955 exp.SHA: rename_func("SHA1"), 956 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 957 exp.StartsWith: rename_func("STARTSWITH"), 958 exp.StrPosition: lambda self, e: self.func( 959 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 960 ), 961 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 962 exp.Stuff: rename_func("INSERT"), 963 exp.TimeAdd: date_delta_sql("TIMEADD"), 964 exp.Timestamp: no_timestamp_sql, 965 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 966 exp.TimestampDiff: lambda self, e: self.func( 967 "TIMESTAMPDIFF", e.unit, e.expression, e.this 968 ), 969 exp.TimestampTrunc: timestamptrunc_sql(), 970 exp.TimeStrToTime: timestrtotime_sql, 971 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 972 exp.ToArray: rename_func("TO_ARRAY"), 973 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 974 exp.ToDouble: rename_func("TO_DOUBLE"), 975 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 976 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 977 exp.TsOrDsToDate: lambda self, e: self.func( 978 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 979 ), 980 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 981 exp.Uuid: rename_func("UUID_STRING"), 982 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 983 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 984 exp.Xor: rename_func("BOOLXOR"), 985 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 986 rename_func("EDITDISTANCE") 987 ), 988 } 989 990 SUPPORTED_JSON_PATH_PARTS = { 991 exp.JSONPathKey, 992 exp.JSONPathRoot, 993 exp.JSONPathSubscript, 994 } 995 996 TYPE_MAPPING = { 997 **generator.Generator.TYPE_MAPPING, 998 exp.DataType.Type.NESTED: "OBJECT", 999 exp.DataType.Type.STRUCT: "OBJECT", 1000 } 1001 1002 PROPERTIES_LOCATION = { 1003 **generator.Generator.PROPERTIES_LOCATION, 1004 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1005 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1006 } 1007 1008 UNSUPPORTED_VALUES_EXPRESSIONS = { 1009 exp.Map, 1010 exp.StarMap, 1011 exp.Struct, 1012 exp.VarMap, 1013 } 1014 1015 def with_properties(self, properties: exp.Properties) -> str: 1016 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1017 1018 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1019 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1020 values_as_table = False 1021 1022 return super().values_sql(expression, values_as_table=values_as_table) 1023 1024 def datatype_sql(self, expression: exp.DataType) -> str: 1025 expressions = expression.expressions 1026 if ( 1027 expressions 1028 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1029 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1030 ): 1031 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1032 return "OBJECT" 1033 1034 return super().datatype_sql(expression) 1035 1036 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1037 return self.func( 1038 "TO_NUMBER", 1039 expression.this, 1040 expression.args.get("format"), 1041 expression.args.get("precision"), 1042 expression.args.get("scale"), 1043 ) 1044 1045 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1046 milli = expression.args.get("milli") 1047 if milli is not None: 1048 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1049 expression.set("nano", milli_to_nano) 1050 1051 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1052 1053 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1054 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1055 return self.func("TO_GEOGRAPHY", expression.this) 1056 if expression.is_type(exp.DataType.Type.GEOMETRY): 1057 return self.func("TO_GEOMETRY", expression.this) 1058 1059 return super().cast_sql(expression, safe_prefix=safe_prefix) 1060 1061 def trycast_sql(self, expression: exp.TryCast) -> str: 1062 value = expression.this 1063 1064 if value.type is None: 1065 from sqlglot.optimizer.annotate_types import annotate_types 1066 1067 value = annotate_types(value) 1068 1069 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1070 return super().trycast_sql(expression) 1071 1072 # TRY_CAST only works for string values in Snowflake 1073 return self.cast_sql(expression) 1074 1075 def log_sql(self, expression: exp.Log) -> str: 1076 if not expression.expression: 1077 return self.func("LN", expression.this) 1078 1079 return super().log_sql(expression) 1080 1081 def unnest_sql(self, expression: exp.Unnest) -> str: 1082 unnest_alias = expression.args.get("alias") 1083 offset = expression.args.get("offset") 1084 1085 columns = [ 1086 exp.to_identifier("seq"), 1087 exp.to_identifier("key"), 1088 exp.to_identifier("path"), 1089 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1090 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1091 or exp.to_identifier("value"), 1092 exp.to_identifier("this"), 1093 ] 1094 1095 if unnest_alias: 1096 unnest_alias.set("columns", columns) 1097 else: 1098 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1099 1100 table_input = self.sql(expression.expressions[0]) 1101 if not table_input.startswith("INPUT =>"): 1102 table_input = f"INPUT => {table_input}" 1103 1104 explode = f"TABLE(FLATTEN({table_input}))" 1105 alias = self.sql(unnest_alias) 1106 alias = f" AS {alias}" if alias else "" 1107 return f"{explode}{alias}" 1108 1109 def show_sql(self, expression: exp.Show) -> str: 1110 terse = "TERSE " if expression.args.get("terse") else "" 1111 history = " HISTORY" if expression.args.get("history") else "" 1112 like = self.sql(expression, "like") 1113 like = f" LIKE {like}" if like else "" 1114 1115 scope = self.sql(expression, "scope") 1116 scope = f" {scope}" if scope else "" 1117 1118 scope_kind = self.sql(expression, "scope_kind") 1119 if scope_kind: 1120 scope_kind = f" IN {scope_kind}" 1121 1122 starts_with = self.sql(expression, "starts_with") 1123 if starts_with: 1124 starts_with = f" STARTS WITH {starts_with}" 1125 1126 limit = self.sql(expression, "limit") 1127 1128 from_ = self.sql(expression, "from") 1129 if from_: 1130 from_ = f" FROM {from_}" 1131 1132 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1133 1134 def describe_sql(self, expression: exp.Describe) -> str: 1135 # Default to table if kind is unknown 1136 kind_value = expression.args.get("kind") or "TABLE" 1137 kind = f" {kind_value}" if kind_value else "" 1138 this = f" {self.sql(expression, 'this')}" 1139 expressions = self.expressions(expression, flat=True) 1140 expressions = f" {expressions}" if expressions else "" 1141 return f"DESCRIBE{kind}{this}{expressions}" 1142 1143 def generatedasidentitycolumnconstraint_sql( 1144 self, expression: exp.GeneratedAsIdentityColumnConstraint 1145 ) -> str: 1146 start = expression.args.get("start") 1147 start = f" START {start}" if start else "" 1148 increment = expression.args.get("increment") 1149 increment = f" INCREMENT {increment}" if increment else "" 1150 return f"AUTOINCREMENT{start}{increment}" 1151 1152 def cluster_sql(self, expression: exp.Cluster) -> str: 1153 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1154 1155 def struct_sql(self, expression: exp.Struct) -> str: 1156 keys = [] 1157 values = [] 1158 1159 for i, e in enumerate(expression.expressions): 1160 if isinstance(e, exp.PropertyEQ): 1161 keys.append( 1162 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1163 ) 1164 values.append(e.expression) 1165 else: 1166 keys.append(exp.Literal.string(f"_{i}")) 1167 values.append(e) 1168 1169 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1170 1171 @unsupported_args("weight", "accuracy") 1172 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1173 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1174 1175 def alterset_sql(self, expression: exp.AlterSet) -> str: 1176 exprs = self.expressions(expression, flat=True) 1177 exprs = f" {exprs}" if exprs else "" 1178 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1179 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1180 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1181 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1182 tag = self.expressions(expression, key="tag", flat=True) 1183 tag = f" TAG {tag}" if tag else "" 1184 1185 return f"SET{exprs}{file_format}{copy_options}{tag}" 1186 1187 def strtotime_sql(self, expression: exp.StrToTime): 1188 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1189 return self.func( 1190 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1191 ) 1192 1193 def timestampsub_sql(self, expression: exp.TimestampSub): 1194 return self.sql( 1195 exp.TimestampAdd( 1196 this=expression.this, 1197 expression=expression.expression * -1, 1198 unit=expression.unit, 1199 ) 1200 ) 1201 1202 def jsonextract_sql(self, expression: exp.JSONExtract): 1203 this = expression.this 1204 1205 # JSON strings are valid coming from other dialects such as BQ 1206 return self.func( 1207 "GET_PATH", 1208 exp.ParseJSON(this=this) if this.is_string else this, 1209 expression.expression, 1210 ) 1211 1212 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1213 this = expression.this 1214 if not isinstance(this, exp.TsOrDsToTimestamp): 1215 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1216 1217 return self.func("TO_CHAR", this, self.format_time(expression)) 1218 1219 def datesub_sql(self, expression: exp.DateSub) -> str: 1220 value = expression.expression 1221 if value: 1222 value.replace(value * (-1)) 1223 else: 1224 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1225 1226 return date_delta_sql("DATEADD")(self, expression)
326class Snowflake(Dialect): 327 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 328 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 329 NULL_ORDERING = "nulls_are_large" 330 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 331 SUPPORTS_USER_DEFINED_TYPES = False 332 SUPPORTS_SEMI_ANTI_JOIN = False 333 PREFER_CTE_ALIAS_COLUMN = True 334 TABLESAMPLE_SIZE_IS_PERCENT = True 335 COPY_PARAMS_ARE_CSV = False 336 ARRAY_AGG_INCLUDES_NULLS = None 337 338 TIME_MAPPING = { 339 "YYYY": "%Y", 340 "yyyy": "%Y", 341 "YY": "%y", 342 "yy": "%y", 343 "MMMM": "%B", 344 "mmmm": "%B", 345 "MON": "%b", 346 "mon": "%b", 347 "MM": "%m", 348 "mm": "%m", 349 "DD": "%d", 350 "dd": "%-d", 351 "DY": "%a", 352 "dy": "%w", 353 "HH24": "%H", 354 "hh24": "%H", 355 "HH12": "%I", 356 "hh12": "%I", 357 "MI": "%M", 358 "mi": "%M", 359 "SS": "%S", 360 "ss": "%S", 361 "FF": "%f", 362 "ff": "%f", 363 "FF6": "%f", 364 "ff6": "%f", 365 } 366 367 def quote_identifier(self, expression: E, identify: bool = True) -> E: 368 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 369 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 370 if ( 371 isinstance(expression, exp.Identifier) 372 and isinstance(expression.parent, exp.Table) 373 and expression.name.lower() == "dual" 374 ): 375 return expression # type: ignore 376 377 return super().quote_identifier(expression, identify=identify) 378 379 class Parser(parser.Parser): 380 IDENTIFY_PIVOT_STRINGS = True 381 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 382 COLON_IS_VARIANT_EXTRACT = True 383 384 ID_VAR_TOKENS = { 385 *parser.Parser.ID_VAR_TOKENS, 386 TokenType.MATCH_CONDITION, 387 } 388 389 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 390 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 391 392 FUNCTIONS = { 393 **parser.Parser.FUNCTIONS, 394 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 395 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 396 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 397 this=seq_get(args, 1), expression=seq_get(args, 0) 398 ), 399 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 400 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 401 start=seq_get(args, 0), 402 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 403 step=seq_get(args, 2), 404 ), 405 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 406 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 407 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 408 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 409 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 410 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 411 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 412 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 413 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 414 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 415 "DATE_TRUNC": _date_trunc_to_time, 416 "DATEADD": _build_date_time_add(exp.DateAdd), 417 "DATEDIFF": _build_datediff, 418 "DIV0": _build_if_from_div0, 419 "EDITDISTANCE": lambda args: exp.Levenshtein( 420 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 421 ), 422 "FLATTEN": exp.Explode.from_arg_list, 423 "GET_PATH": lambda args, dialect: exp.JSONExtract( 424 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 425 ), 426 "IFF": exp.If.from_arg_list, 427 "LAST_DAY": lambda args: exp.LastDay( 428 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 429 ), 430 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 431 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 432 "LISTAGG": exp.GroupConcat.from_arg_list, 433 "NULLIFZERO": _build_if_from_nullifzero, 434 "OBJECT_CONSTRUCT": _build_object_construct, 435 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 436 "REGEXP_REPLACE": _build_regexp_replace, 437 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 438 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 439 "RLIKE": exp.RegexpLike.from_arg_list, 440 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 441 "TIMEADD": _build_date_time_add(exp.TimeAdd), 442 "TIMEDIFF": _build_datediff, 443 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 444 "TIMESTAMPDIFF": _build_datediff, 445 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 446 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 447 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 448 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 449 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 450 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 451 "TRY_TO_TIMESTAMP": _build_datetime( 452 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 453 ), 454 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 455 "TO_NUMBER": lambda args: exp.ToNumber( 456 this=seq_get(args, 0), 457 format=seq_get(args, 1), 458 precision=seq_get(args, 2), 459 scale=seq_get(args, 3), 460 ), 461 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 462 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 463 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 464 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 465 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 466 "TO_VARCHAR": exp.ToChar.from_arg_list, 467 "ZEROIFNULL": _build_if_from_zeroifnull, 468 } 469 470 FUNCTION_PARSERS = { 471 **parser.Parser.FUNCTION_PARSERS, 472 "DATE_PART": lambda self: self._parse_date_part(), 473 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 474 } 475 FUNCTION_PARSERS.pop("TRIM") 476 477 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 478 479 RANGE_PARSERS = { 480 **parser.Parser.RANGE_PARSERS, 481 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 482 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 483 } 484 485 ALTER_PARSERS = { 486 **parser.Parser.ALTER_PARSERS, 487 "UNSET": lambda self: self.expression( 488 exp.Set, 489 tag=self._match_text_seq("TAG"), 490 expressions=self._parse_csv(self._parse_id_var), 491 unset=True, 492 ), 493 } 494 495 STATEMENT_PARSERS = { 496 **parser.Parser.STATEMENT_PARSERS, 497 TokenType.SHOW: lambda self: self._parse_show(), 498 } 499 500 PROPERTY_PARSERS = { 501 **parser.Parser.PROPERTY_PARSERS, 502 "LOCATION": lambda self: self._parse_location_property(), 503 "TAG": lambda self: self._parse_tag(), 504 } 505 506 TYPE_CONVERTERS = { 507 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 508 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 509 } 510 511 SHOW_PARSERS = { 512 "SCHEMAS": _show_parser("SCHEMAS"), 513 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 514 "OBJECTS": _show_parser("OBJECTS"), 515 "TERSE OBJECTS": _show_parser("OBJECTS"), 516 "TABLES": _show_parser("TABLES"), 517 "TERSE TABLES": _show_parser("TABLES"), 518 "VIEWS": _show_parser("VIEWS"), 519 "TERSE VIEWS": _show_parser("VIEWS"), 520 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 521 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 522 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 523 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 524 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 525 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 526 "SEQUENCES": _show_parser("SEQUENCES"), 527 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 528 "COLUMNS": _show_parser("COLUMNS"), 529 "USERS": _show_parser("USERS"), 530 "TERSE USERS": _show_parser("USERS"), 531 } 532 533 CONSTRAINT_PARSERS = { 534 **parser.Parser.CONSTRAINT_PARSERS, 535 "WITH": lambda self: self._parse_with_constraint(), 536 "MASKING": lambda self: self._parse_with_constraint(), 537 "PROJECTION": lambda self: self._parse_with_constraint(), 538 "TAG": lambda self: self._parse_with_constraint(), 539 } 540 541 STAGED_FILE_SINGLE_TOKENS = { 542 TokenType.DOT, 543 TokenType.MOD, 544 TokenType.SLASH, 545 } 546 547 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 548 549 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 550 551 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 552 553 LAMBDAS = { 554 **parser.Parser.LAMBDAS, 555 TokenType.ARROW: lambda self, expressions: self.expression( 556 exp.Lambda, 557 this=self._replace_lambda( 558 self._parse_assignment(), 559 expressions, 560 ), 561 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 562 ), 563 } 564 565 def _negate_range( 566 self, this: t.Optional[exp.Expression] = None 567 ) -> t.Optional[exp.Expression]: 568 if not this: 569 return this 570 571 query = this.args.get("query") 572 if isinstance(this, exp.In) and isinstance(query, exp.Query): 573 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 574 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 575 # which can produce different results (most likely a SnowFlake bug). 576 # 577 # https://docs.snowflake.com/en/sql-reference/functions/in 578 # Context: https://github.com/tobymao/sqlglot/issues/3890 579 return self.expression( 580 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 581 ) 582 583 return self.expression(exp.Not, this=this) 584 585 def _parse_tag(self) -> exp.Tags: 586 return self.expression( 587 exp.Tags, 588 expressions=self._parse_wrapped_csv(self._parse_property), 589 ) 590 591 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 592 if self._prev.token_type != TokenType.WITH: 593 self._retreat(self._index - 1) 594 595 if self._match_text_seq("MASKING", "POLICY"): 596 policy = self._parse_column() 597 return self.expression( 598 exp.MaskingPolicyColumnConstraint, 599 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 600 expressions=self._match(TokenType.USING) 601 and self._parse_wrapped_csv(self._parse_id_var), 602 ) 603 if self._match_text_seq("PROJECTION", "POLICY"): 604 policy = self._parse_column() 605 return self.expression( 606 exp.ProjectionPolicyColumnConstraint, 607 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 608 ) 609 if self._match(TokenType.TAG): 610 return self._parse_tag() 611 612 return None 613 614 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 615 if self._match(TokenType.TAG): 616 return self._parse_tag() 617 618 return super()._parse_with_property() 619 620 def _parse_create(self) -> exp.Create | exp.Command: 621 expression = super()._parse_create() 622 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 623 # Replace the Table node with the enclosed Identifier 624 expression.this.replace(expression.this.this) 625 626 return expression 627 628 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 629 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 630 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 631 this = self._parse_var() or self._parse_type() 632 633 if not this: 634 return None 635 636 self._match(TokenType.COMMA) 637 expression = self._parse_bitwise() 638 this = map_date_part(this) 639 name = this.name.upper() 640 641 if name.startswith("EPOCH"): 642 if name == "EPOCH_MILLISECOND": 643 scale = 10**3 644 elif name == "EPOCH_MICROSECOND": 645 scale = 10**6 646 elif name == "EPOCH_NANOSECOND": 647 scale = 10**9 648 else: 649 scale = None 650 651 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 652 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 653 654 if scale: 655 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 656 657 return to_unix 658 659 return self.expression(exp.Extract, this=this, expression=expression) 660 661 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 662 if is_map: 663 # Keys are strings in Snowflake's objects, see also: 664 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 665 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 666 return self._parse_slice(self._parse_string()) 667 668 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 669 670 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 671 lateral = super()._parse_lateral() 672 if not lateral: 673 return lateral 674 675 if isinstance(lateral.this, exp.Explode): 676 table_alias = lateral.args.get("alias") 677 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 678 if table_alias and not table_alias.args.get("columns"): 679 table_alias.set("columns", columns) 680 elif not table_alias: 681 exp.alias_(lateral, "_flattened", table=columns, copy=False) 682 683 return lateral 684 685 def _parse_table_parts( 686 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 687 ) -> exp.Table: 688 # https://docs.snowflake.com/en/user-guide/querying-stage 689 if self._match(TokenType.STRING, advance=False): 690 table = self._parse_string() 691 elif self._match_text_seq("@", advance=False): 692 table = self._parse_location_path() 693 else: 694 table = None 695 696 if table: 697 file_format = None 698 pattern = None 699 700 wrapped = self._match(TokenType.L_PAREN) 701 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 702 if self._match_text_seq("FILE_FORMAT", "=>"): 703 file_format = self._parse_string() or super()._parse_table_parts( 704 is_db_reference=is_db_reference 705 ) 706 elif self._match_text_seq("PATTERN", "=>"): 707 pattern = self._parse_string() 708 else: 709 break 710 711 self._match(TokenType.COMMA) 712 713 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 714 else: 715 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 716 717 return table 718 719 def _parse_id_var( 720 self, 721 any_token: bool = True, 722 tokens: t.Optional[t.Collection[TokenType]] = None, 723 ) -> t.Optional[exp.Expression]: 724 if self._match_text_seq("IDENTIFIER", "("): 725 identifier = ( 726 super()._parse_id_var(any_token=any_token, tokens=tokens) 727 or self._parse_string() 728 ) 729 self._match_r_paren() 730 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 731 732 return super()._parse_id_var(any_token=any_token, tokens=tokens) 733 734 def _parse_show_snowflake(self, this: str) -> exp.Show: 735 scope = None 736 scope_kind = None 737 738 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 739 # which is syntactically valid but has no effect on the output 740 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 741 742 history = self._match_text_seq("HISTORY") 743 744 like = self._parse_string() if self._match(TokenType.LIKE) else None 745 746 if self._match(TokenType.IN): 747 if self._match_text_seq("ACCOUNT"): 748 scope_kind = "ACCOUNT" 749 elif self._match_set(self.DB_CREATABLES): 750 scope_kind = self._prev.text.upper() 751 if self._curr: 752 scope = self._parse_table_parts() 753 elif self._curr: 754 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 755 scope = self._parse_table_parts() 756 757 return self.expression( 758 exp.Show, 759 **{ 760 "terse": terse, 761 "this": this, 762 "history": history, 763 "like": like, 764 "scope": scope, 765 "scope_kind": scope_kind, 766 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 767 "limit": self._parse_limit(), 768 "from": self._parse_string() if self._match(TokenType.FROM) else None, 769 }, 770 ) 771 772 def _parse_location_property(self) -> exp.LocationProperty: 773 self._match(TokenType.EQ) 774 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 775 776 def _parse_file_location(self) -> t.Optional[exp.Expression]: 777 # Parse either a subquery or a staged file 778 return ( 779 self._parse_select(table=True, parse_subquery_alias=False) 780 if self._match(TokenType.L_PAREN, advance=False) 781 else self._parse_table_parts() 782 ) 783 784 def _parse_location_path(self) -> exp.Var: 785 parts = [self._advance_any(ignore_reserved=True)] 786 787 # We avoid consuming a comma token because external tables like @foo and @bar 788 # can be joined in a query with a comma separator, as well as closing paren 789 # in case of subqueries 790 while self._is_connected() and not self._match_set( 791 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 792 ): 793 parts.append(self._advance_any(ignore_reserved=True)) 794 795 return exp.var("".join(part.text for part in parts if part)) 796 797 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 798 this = super()._parse_lambda_arg() 799 800 if not this: 801 return this 802 803 typ = self._parse_types() 804 805 if typ: 806 return self.expression(exp.Cast, this=this, to=typ) 807 808 return this 809 810 def _parse_foreign_key(self) -> exp.ForeignKey: 811 # inlineFK, the REFERENCES columns are implied 812 if self._match(TokenType.REFERENCES, advance=False): 813 return self.expression(exp.ForeignKey) 814 815 # outoflineFK, explicitly names the columns 816 return super()._parse_foreign_key() 817 818 class Tokenizer(tokens.Tokenizer): 819 STRING_ESCAPES = ["\\", "'"] 820 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 821 RAW_STRINGS = ["$$"] 822 COMMENTS = ["--", "//", ("/*", "*/")] 823 NESTED_COMMENTS = False 824 825 KEYWORDS = { 826 **tokens.Tokenizer.KEYWORDS, 827 "BYTEINT": TokenType.INT, 828 "CHAR VARYING": TokenType.VARCHAR, 829 "CHARACTER VARYING": TokenType.VARCHAR, 830 "EXCLUDE": TokenType.EXCEPT, 831 "ILIKE ANY": TokenType.ILIKE_ANY, 832 "LIKE ANY": TokenType.LIKE_ANY, 833 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 834 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 835 "MINUS": TokenType.EXCEPT, 836 "NCHAR VARYING": TokenType.VARCHAR, 837 "PUT": TokenType.COMMAND, 838 "REMOVE": TokenType.COMMAND, 839 "RM": TokenType.COMMAND, 840 "SAMPLE": TokenType.TABLE_SAMPLE, 841 "SQL_DOUBLE": TokenType.DOUBLE, 842 "SQL_VARCHAR": TokenType.VARCHAR, 843 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 844 "TAG": TokenType.TAG, 845 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 846 "TOP": TokenType.TOP, 847 "WAREHOUSE": TokenType.WAREHOUSE, 848 "STREAMLIT": TokenType.STREAMLIT, 849 } 850 KEYWORDS.pop("/*+") 851 852 SINGLE_TOKENS = { 853 **tokens.Tokenizer.SINGLE_TOKENS, 854 "$": TokenType.PARAMETER, 855 } 856 857 VAR_SINGLE_TOKENS = {"$"} 858 859 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 860 861 class Generator(generator.Generator): 862 PARAMETER_TOKEN = "$" 863 MATCHED_BY_SOURCE = False 864 SINGLE_STRING_INTERVAL = True 865 JOIN_HINTS = False 866 TABLE_HINTS = False 867 QUERY_HINTS = False 868 AGGREGATE_FILTER_SUPPORTED = False 869 SUPPORTS_TABLE_COPY = False 870 COLLATE_IS_FUNC = True 871 LIMIT_ONLY_LITERALS = True 872 JSON_KEY_VALUE_PAIR_SEP = "," 873 INSERT_OVERWRITE = " OVERWRITE INTO" 874 STRUCT_DELIMITER = ("(", ")") 875 COPY_PARAMS_ARE_WRAPPED = False 876 COPY_PARAMS_EQ_REQUIRED = True 877 STAR_EXCEPT = "EXCLUDE" 878 SUPPORTS_EXPLODING_PROJECTIONS = False 879 ARRAY_CONCAT_IS_VAR_LEN = False 880 SUPPORTS_CONVERT_TIMEZONE = True 881 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 882 SUPPORTS_MEDIAN = True 883 ARRAY_SIZE_NAME = "ARRAY_SIZE" 884 885 TRANSFORMS = { 886 **generator.Generator.TRANSFORMS, 887 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 888 exp.ArgMax: rename_func("MAX_BY"), 889 exp.ArgMin: rename_func("MIN_BY"), 890 exp.Array: inline_array_sql, 891 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 892 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 893 exp.AtTimeZone: lambda self, e: self.func( 894 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 895 ), 896 exp.BitwiseOr: rename_func("BITOR"), 897 exp.BitwiseXor: rename_func("BITXOR"), 898 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 899 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 900 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 901 exp.DateAdd: date_delta_sql("DATEADD"), 902 exp.DateDiff: date_delta_sql("DATEDIFF"), 903 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 904 exp.DatetimeDiff: timestampdiff_sql, 905 exp.DateStrToDate: datestrtodate_sql, 906 exp.DayOfMonth: rename_func("DAYOFMONTH"), 907 exp.DayOfWeek: rename_func("DAYOFWEEK"), 908 exp.DayOfYear: rename_func("DAYOFYEAR"), 909 exp.Explode: rename_func("FLATTEN"), 910 exp.Extract: rename_func("DATE_PART"), 911 exp.FromTimeZone: lambda self, e: self.func( 912 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 913 ), 914 exp.GenerateSeries: lambda self, e: self.func( 915 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 916 ), 917 exp.GroupConcat: rename_func("LISTAGG"), 918 exp.If: if_sql(name="IFF", false_value="NULL"), 919 exp.JSONExtractArray: _json_extract_value_array_sql, 920 exp.JSONExtractScalar: lambda self, e: self.func( 921 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 922 ), 923 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 924 exp.JSONPathRoot: lambda *_: "", 925 exp.JSONValueArray: _json_extract_value_array_sql, 926 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 927 exp.LogicalOr: rename_func("BOOLOR_AGG"), 928 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 929 exp.MakeInterval: no_make_interval_sql, 930 exp.Max: max_or_greatest, 931 exp.Min: min_or_least, 932 exp.ParseJSON: lambda self, e: self.func( 933 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 934 ), 935 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 936 exp.PercentileCont: transforms.preprocess( 937 [transforms.add_within_group_for_percentiles] 938 ), 939 exp.PercentileDisc: transforms.preprocess( 940 [transforms.add_within_group_for_percentiles] 941 ), 942 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 943 exp.RegexpExtract: _regexpextract_sql, 944 exp.RegexpExtractAll: _regexpextract_sql, 945 exp.RegexpILike: _regexpilike_sql, 946 exp.Rand: rename_func("RANDOM"), 947 exp.Select: transforms.preprocess( 948 [ 949 transforms.eliminate_distinct_on, 950 transforms.explode_to_unnest(), 951 transforms.eliminate_semi_and_anti_joins, 952 _transform_generate_date_array, 953 ] 954 ), 955 exp.SafeDivide: lambda self, e: no_safe_divide_sql(self, e, "IFF"), 956 exp.SHA: rename_func("SHA1"), 957 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 958 exp.StartsWith: rename_func("STARTSWITH"), 959 exp.StrPosition: lambda self, e: self.func( 960 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 961 ), 962 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 963 exp.Stuff: rename_func("INSERT"), 964 exp.TimeAdd: date_delta_sql("TIMEADD"), 965 exp.Timestamp: no_timestamp_sql, 966 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 967 exp.TimestampDiff: lambda self, e: self.func( 968 "TIMESTAMPDIFF", e.unit, e.expression, e.this 969 ), 970 exp.TimestampTrunc: timestamptrunc_sql(), 971 exp.TimeStrToTime: timestrtotime_sql, 972 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 973 exp.ToArray: rename_func("TO_ARRAY"), 974 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 975 exp.ToDouble: rename_func("TO_DOUBLE"), 976 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 977 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 978 exp.TsOrDsToDate: lambda self, e: self.func( 979 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 980 ), 981 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 982 exp.Uuid: rename_func("UUID_STRING"), 983 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 984 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 985 exp.Xor: rename_func("BOOLXOR"), 986 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 987 rename_func("EDITDISTANCE") 988 ), 989 } 990 991 SUPPORTED_JSON_PATH_PARTS = { 992 exp.JSONPathKey, 993 exp.JSONPathRoot, 994 exp.JSONPathSubscript, 995 } 996 997 TYPE_MAPPING = { 998 **generator.Generator.TYPE_MAPPING, 999 exp.DataType.Type.NESTED: "OBJECT", 1000 exp.DataType.Type.STRUCT: "OBJECT", 1001 } 1002 1003 PROPERTIES_LOCATION = { 1004 **generator.Generator.PROPERTIES_LOCATION, 1005 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1006 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1007 } 1008 1009 UNSUPPORTED_VALUES_EXPRESSIONS = { 1010 exp.Map, 1011 exp.StarMap, 1012 exp.Struct, 1013 exp.VarMap, 1014 } 1015 1016 def with_properties(self, properties: exp.Properties) -> str: 1017 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1018 1019 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1020 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1021 values_as_table = False 1022 1023 return super().values_sql(expression, values_as_table=values_as_table) 1024 1025 def datatype_sql(self, expression: exp.DataType) -> str: 1026 expressions = expression.expressions 1027 if ( 1028 expressions 1029 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1030 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1031 ): 1032 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1033 return "OBJECT" 1034 1035 return super().datatype_sql(expression) 1036 1037 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1038 return self.func( 1039 "TO_NUMBER", 1040 expression.this, 1041 expression.args.get("format"), 1042 expression.args.get("precision"), 1043 expression.args.get("scale"), 1044 ) 1045 1046 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1047 milli = expression.args.get("milli") 1048 if milli is not None: 1049 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1050 expression.set("nano", milli_to_nano) 1051 1052 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1053 1054 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1055 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1056 return self.func("TO_GEOGRAPHY", expression.this) 1057 if expression.is_type(exp.DataType.Type.GEOMETRY): 1058 return self.func("TO_GEOMETRY", expression.this) 1059 1060 return super().cast_sql(expression, safe_prefix=safe_prefix) 1061 1062 def trycast_sql(self, expression: exp.TryCast) -> str: 1063 value = expression.this 1064 1065 if value.type is None: 1066 from sqlglot.optimizer.annotate_types import annotate_types 1067 1068 value = annotate_types(value) 1069 1070 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1071 return super().trycast_sql(expression) 1072 1073 # TRY_CAST only works for string values in Snowflake 1074 return self.cast_sql(expression) 1075 1076 def log_sql(self, expression: exp.Log) -> str: 1077 if not expression.expression: 1078 return self.func("LN", expression.this) 1079 1080 return super().log_sql(expression) 1081 1082 def unnest_sql(self, expression: exp.Unnest) -> str: 1083 unnest_alias = expression.args.get("alias") 1084 offset = expression.args.get("offset") 1085 1086 columns = [ 1087 exp.to_identifier("seq"), 1088 exp.to_identifier("key"), 1089 exp.to_identifier("path"), 1090 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1091 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1092 or exp.to_identifier("value"), 1093 exp.to_identifier("this"), 1094 ] 1095 1096 if unnest_alias: 1097 unnest_alias.set("columns", columns) 1098 else: 1099 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1100 1101 table_input = self.sql(expression.expressions[0]) 1102 if not table_input.startswith("INPUT =>"): 1103 table_input = f"INPUT => {table_input}" 1104 1105 explode = f"TABLE(FLATTEN({table_input}))" 1106 alias = self.sql(unnest_alias) 1107 alias = f" AS {alias}" if alias else "" 1108 return f"{explode}{alias}" 1109 1110 def show_sql(self, expression: exp.Show) -> str: 1111 terse = "TERSE " if expression.args.get("terse") else "" 1112 history = " HISTORY" if expression.args.get("history") else "" 1113 like = self.sql(expression, "like") 1114 like = f" LIKE {like}" if like else "" 1115 1116 scope = self.sql(expression, "scope") 1117 scope = f" {scope}" if scope else "" 1118 1119 scope_kind = self.sql(expression, "scope_kind") 1120 if scope_kind: 1121 scope_kind = f" IN {scope_kind}" 1122 1123 starts_with = self.sql(expression, "starts_with") 1124 if starts_with: 1125 starts_with = f" STARTS WITH {starts_with}" 1126 1127 limit = self.sql(expression, "limit") 1128 1129 from_ = self.sql(expression, "from") 1130 if from_: 1131 from_ = f" FROM {from_}" 1132 1133 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1134 1135 def describe_sql(self, expression: exp.Describe) -> str: 1136 # Default to table if kind is unknown 1137 kind_value = expression.args.get("kind") or "TABLE" 1138 kind = f" {kind_value}" if kind_value else "" 1139 this = f" {self.sql(expression, 'this')}" 1140 expressions = self.expressions(expression, flat=True) 1141 expressions = f" {expressions}" if expressions else "" 1142 return f"DESCRIBE{kind}{this}{expressions}" 1143 1144 def generatedasidentitycolumnconstraint_sql( 1145 self, expression: exp.GeneratedAsIdentityColumnConstraint 1146 ) -> str: 1147 start = expression.args.get("start") 1148 start = f" START {start}" if start else "" 1149 increment = expression.args.get("increment") 1150 increment = f" INCREMENT {increment}" if increment else "" 1151 return f"AUTOINCREMENT{start}{increment}" 1152 1153 def cluster_sql(self, expression: exp.Cluster) -> str: 1154 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1155 1156 def struct_sql(self, expression: exp.Struct) -> str: 1157 keys = [] 1158 values = [] 1159 1160 for i, e in enumerate(expression.expressions): 1161 if isinstance(e, exp.PropertyEQ): 1162 keys.append( 1163 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1164 ) 1165 values.append(e.expression) 1166 else: 1167 keys.append(exp.Literal.string(f"_{i}")) 1168 values.append(e) 1169 1170 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1171 1172 @unsupported_args("weight", "accuracy") 1173 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1174 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1175 1176 def alterset_sql(self, expression: exp.AlterSet) -> str: 1177 exprs = self.expressions(expression, flat=True) 1178 exprs = f" {exprs}" if exprs else "" 1179 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1180 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1181 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1182 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1183 tag = self.expressions(expression, key="tag", flat=True) 1184 tag = f" TAG {tag}" if tag else "" 1185 1186 return f"SET{exprs}{file_format}{copy_options}{tag}" 1187 1188 def strtotime_sql(self, expression: exp.StrToTime): 1189 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1190 return self.func( 1191 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1192 ) 1193 1194 def timestampsub_sql(self, expression: exp.TimestampSub): 1195 return self.sql( 1196 exp.TimestampAdd( 1197 this=expression.this, 1198 expression=expression.expression * -1, 1199 unit=expression.unit, 1200 ) 1201 ) 1202 1203 def jsonextract_sql(self, expression: exp.JSONExtract): 1204 this = expression.this 1205 1206 # JSON strings are valid coming from other dialects such as BQ 1207 return self.func( 1208 "GET_PATH", 1209 exp.ParseJSON(this=this) if this.is_string else this, 1210 expression.expression, 1211 ) 1212 1213 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1214 this = expression.this 1215 if not isinstance(this, exp.TsOrDsToTimestamp): 1216 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1217 1218 return self.func("TO_CHAR", this, self.format_time(expression)) 1219 1220 def datesub_sql(self, expression: exp.DateSub) -> str: 1221 value = expression.expression 1222 if value: 1223 value.replace(value * (-1)) 1224 else: 1225 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1226 1227 return date_delta_sql("DATEADD")(self, expression)
Specifies the strategy according to which identifiers should be normalized.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime
formats.
367 def quote_identifier(self, expression: E, identify: bool = True) -> E: 368 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 369 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 370 if ( 371 isinstance(expression, exp.Identifier) 372 and isinstance(expression.parent, exp.Table) 373 and expression.name.lower() == "dual" 374 ): 375 return expression # type: ignore 376 377 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier
, this method is a no-op. - identify: If set to
False
, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- PRESERVE_ORIGINAL_NAMES
- LOG_BASE_FIRST
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- PROMOTE_TO_INFERRED_DATETIME_TYPE
- SUPPORTS_VALUES_DEFAULT
- REGEXP_EXTRACT_DEFAULT_GROUP
- SET_OP_DISTINCT_BY_DEFAULT
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
379 class Parser(parser.Parser): 380 IDENTIFY_PIVOT_STRINGS = True 381 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 382 COLON_IS_VARIANT_EXTRACT = True 383 384 ID_VAR_TOKENS = { 385 *parser.Parser.ID_VAR_TOKENS, 386 TokenType.MATCH_CONDITION, 387 } 388 389 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 390 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 391 392 FUNCTIONS = { 393 **parser.Parser.FUNCTIONS, 394 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 395 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 396 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 397 this=seq_get(args, 1), expression=seq_get(args, 0) 398 ), 399 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 400 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 401 start=seq_get(args, 0), 402 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 403 step=seq_get(args, 2), 404 ), 405 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 406 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 407 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 408 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 409 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 410 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 411 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 412 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 413 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 414 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 415 "DATE_TRUNC": _date_trunc_to_time, 416 "DATEADD": _build_date_time_add(exp.DateAdd), 417 "DATEDIFF": _build_datediff, 418 "DIV0": _build_if_from_div0, 419 "EDITDISTANCE": lambda args: exp.Levenshtein( 420 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 421 ), 422 "FLATTEN": exp.Explode.from_arg_list, 423 "GET_PATH": lambda args, dialect: exp.JSONExtract( 424 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 425 ), 426 "IFF": exp.If.from_arg_list, 427 "LAST_DAY": lambda args: exp.LastDay( 428 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 429 ), 430 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 431 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 432 "LISTAGG": exp.GroupConcat.from_arg_list, 433 "NULLIFZERO": _build_if_from_nullifzero, 434 "OBJECT_CONSTRUCT": _build_object_construct, 435 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 436 "REGEXP_REPLACE": _build_regexp_replace, 437 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 438 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 439 "RLIKE": exp.RegexpLike.from_arg_list, 440 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 441 "TIMEADD": _build_date_time_add(exp.TimeAdd), 442 "TIMEDIFF": _build_datediff, 443 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 444 "TIMESTAMPDIFF": _build_datediff, 445 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 446 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 447 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 448 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 449 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 450 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 451 "TRY_TO_TIMESTAMP": _build_datetime( 452 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 453 ), 454 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 455 "TO_NUMBER": lambda args: exp.ToNumber( 456 this=seq_get(args, 0), 457 format=seq_get(args, 1), 458 precision=seq_get(args, 2), 459 scale=seq_get(args, 3), 460 ), 461 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 462 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 463 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 464 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 465 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 466 "TO_VARCHAR": exp.ToChar.from_arg_list, 467 "ZEROIFNULL": _build_if_from_zeroifnull, 468 } 469 470 FUNCTION_PARSERS = { 471 **parser.Parser.FUNCTION_PARSERS, 472 "DATE_PART": lambda self: self._parse_date_part(), 473 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 474 } 475 FUNCTION_PARSERS.pop("TRIM") 476 477 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 478 479 RANGE_PARSERS = { 480 **parser.Parser.RANGE_PARSERS, 481 TokenType.LIKE_ANY: parser.binary_range_parser(exp.LikeAny), 482 TokenType.ILIKE_ANY: parser.binary_range_parser(exp.ILikeAny), 483 } 484 485 ALTER_PARSERS = { 486 **parser.Parser.ALTER_PARSERS, 487 "UNSET": lambda self: self.expression( 488 exp.Set, 489 tag=self._match_text_seq("TAG"), 490 expressions=self._parse_csv(self._parse_id_var), 491 unset=True, 492 ), 493 } 494 495 STATEMENT_PARSERS = { 496 **parser.Parser.STATEMENT_PARSERS, 497 TokenType.SHOW: lambda self: self._parse_show(), 498 } 499 500 PROPERTY_PARSERS = { 501 **parser.Parser.PROPERTY_PARSERS, 502 "LOCATION": lambda self: self._parse_location_property(), 503 "TAG": lambda self: self._parse_tag(), 504 } 505 506 TYPE_CONVERTERS = { 507 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 508 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 509 } 510 511 SHOW_PARSERS = { 512 "SCHEMAS": _show_parser("SCHEMAS"), 513 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 514 "OBJECTS": _show_parser("OBJECTS"), 515 "TERSE OBJECTS": _show_parser("OBJECTS"), 516 "TABLES": _show_parser("TABLES"), 517 "TERSE TABLES": _show_parser("TABLES"), 518 "VIEWS": _show_parser("VIEWS"), 519 "TERSE VIEWS": _show_parser("VIEWS"), 520 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 521 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 522 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 523 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 524 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 525 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 526 "SEQUENCES": _show_parser("SEQUENCES"), 527 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 528 "COLUMNS": _show_parser("COLUMNS"), 529 "USERS": _show_parser("USERS"), 530 "TERSE USERS": _show_parser("USERS"), 531 } 532 533 CONSTRAINT_PARSERS = { 534 **parser.Parser.CONSTRAINT_PARSERS, 535 "WITH": lambda self: self._parse_with_constraint(), 536 "MASKING": lambda self: self._parse_with_constraint(), 537 "PROJECTION": lambda self: self._parse_with_constraint(), 538 "TAG": lambda self: self._parse_with_constraint(), 539 } 540 541 STAGED_FILE_SINGLE_TOKENS = { 542 TokenType.DOT, 543 TokenType.MOD, 544 TokenType.SLASH, 545 } 546 547 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 548 549 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 550 551 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 552 553 LAMBDAS = { 554 **parser.Parser.LAMBDAS, 555 TokenType.ARROW: lambda self, expressions: self.expression( 556 exp.Lambda, 557 this=self._replace_lambda( 558 self._parse_assignment(), 559 expressions, 560 ), 561 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 562 ), 563 } 564 565 def _negate_range( 566 self, this: t.Optional[exp.Expression] = None 567 ) -> t.Optional[exp.Expression]: 568 if not this: 569 return this 570 571 query = this.args.get("query") 572 if isinstance(this, exp.In) and isinstance(query, exp.Query): 573 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 574 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 575 # which can produce different results (most likely a SnowFlake bug). 576 # 577 # https://docs.snowflake.com/en/sql-reference/functions/in 578 # Context: https://github.com/tobymao/sqlglot/issues/3890 579 return self.expression( 580 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 581 ) 582 583 return self.expression(exp.Not, this=this) 584 585 def _parse_tag(self) -> exp.Tags: 586 return self.expression( 587 exp.Tags, 588 expressions=self._parse_wrapped_csv(self._parse_property), 589 ) 590 591 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 592 if self._prev.token_type != TokenType.WITH: 593 self._retreat(self._index - 1) 594 595 if self._match_text_seq("MASKING", "POLICY"): 596 policy = self._parse_column() 597 return self.expression( 598 exp.MaskingPolicyColumnConstraint, 599 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 600 expressions=self._match(TokenType.USING) 601 and self._parse_wrapped_csv(self._parse_id_var), 602 ) 603 if self._match_text_seq("PROJECTION", "POLICY"): 604 policy = self._parse_column() 605 return self.expression( 606 exp.ProjectionPolicyColumnConstraint, 607 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 608 ) 609 if self._match(TokenType.TAG): 610 return self._parse_tag() 611 612 return None 613 614 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 615 if self._match(TokenType.TAG): 616 return self._parse_tag() 617 618 return super()._parse_with_property() 619 620 def _parse_create(self) -> exp.Create | exp.Command: 621 expression = super()._parse_create() 622 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 623 # Replace the Table node with the enclosed Identifier 624 expression.this.replace(expression.this.this) 625 626 return expression 627 628 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 629 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 630 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 631 this = self._parse_var() or self._parse_type() 632 633 if not this: 634 return None 635 636 self._match(TokenType.COMMA) 637 expression = self._parse_bitwise() 638 this = map_date_part(this) 639 name = this.name.upper() 640 641 if name.startswith("EPOCH"): 642 if name == "EPOCH_MILLISECOND": 643 scale = 10**3 644 elif name == "EPOCH_MICROSECOND": 645 scale = 10**6 646 elif name == "EPOCH_NANOSECOND": 647 scale = 10**9 648 else: 649 scale = None 650 651 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 652 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 653 654 if scale: 655 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 656 657 return to_unix 658 659 return self.expression(exp.Extract, this=this, expression=expression) 660 661 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 662 if is_map: 663 # Keys are strings in Snowflake's objects, see also: 664 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 665 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 666 return self._parse_slice(self._parse_string()) 667 668 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 669 670 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 671 lateral = super()._parse_lateral() 672 if not lateral: 673 return lateral 674 675 if isinstance(lateral.this, exp.Explode): 676 table_alias = lateral.args.get("alias") 677 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 678 if table_alias and not table_alias.args.get("columns"): 679 table_alias.set("columns", columns) 680 elif not table_alias: 681 exp.alias_(lateral, "_flattened", table=columns, copy=False) 682 683 return lateral 684 685 def _parse_table_parts( 686 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 687 ) -> exp.Table: 688 # https://docs.snowflake.com/en/user-guide/querying-stage 689 if self._match(TokenType.STRING, advance=False): 690 table = self._parse_string() 691 elif self._match_text_seq("@", advance=False): 692 table = self._parse_location_path() 693 else: 694 table = None 695 696 if table: 697 file_format = None 698 pattern = None 699 700 wrapped = self._match(TokenType.L_PAREN) 701 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 702 if self._match_text_seq("FILE_FORMAT", "=>"): 703 file_format = self._parse_string() or super()._parse_table_parts( 704 is_db_reference=is_db_reference 705 ) 706 elif self._match_text_seq("PATTERN", "=>"): 707 pattern = self._parse_string() 708 else: 709 break 710 711 self._match(TokenType.COMMA) 712 713 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 714 else: 715 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 716 717 return table 718 719 def _parse_id_var( 720 self, 721 any_token: bool = True, 722 tokens: t.Optional[t.Collection[TokenType]] = None, 723 ) -> t.Optional[exp.Expression]: 724 if self._match_text_seq("IDENTIFIER", "("): 725 identifier = ( 726 super()._parse_id_var(any_token=any_token, tokens=tokens) 727 or self._parse_string() 728 ) 729 self._match_r_paren() 730 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 731 732 return super()._parse_id_var(any_token=any_token, tokens=tokens) 733 734 def _parse_show_snowflake(self, this: str) -> exp.Show: 735 scope = None 736 scope_kind = None 737 738 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 739 # which is syntactically valid but has no effect on the output 740 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 741 742 history = self._match_text_seq("HISTORY") 743 744 like = self._parse_string() if self._match(TokenType.LIKE) else None 745 746 if self._match(TokenType.IN): 747 if self._match_text_seq("ACCOUNT"): 748 scope_kind = "ACCOUNT" 749 elif self._match_set(self.DB_CREATABLES): 750 scope_kind = self._prev.text.upper() 751 if self._curr: 752 scope = self._parse_table_parts() 753 elif self._curr: 754 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 755 scope = self._parse_table_parts() 756 757 return self.expression( 758 exp.Show, 759 **{ 760 "terse": terse, 761 "this": this, 762 "history": history, 763 "like": like, 764 "scope": scope, 765 "scope_kind": scope_kind, 766 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 767 "limit": self._parse_limit(), 768 "from": self._parse_string() if self._match(TokenType.FROM) else None, 769 }, 770 ) 771 772 def _parse_location_property(self) -> exp.LocationProperty: 773 self._match(TokenType.EQ) 774 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 775 776 def _parse_file_location(self) -> t.Optional[exp.Expression]: 777 # Parse either a subquery or a staged file 778 return ( 779 self._parse_select(table=True, parse_subquery_alias=False) 780 if self._match(TokenType.L_PAREN, advance=False) 781 else self._parse_table_parts() 782 ) 783 784 def _parse_location_path(self) -> exp.Var: 785 parts = [self._advance_any(ignore_reserved=True)] 786 787 # We avoid consuming a comma token because external tables like @foo and @bar 788 # can be joined in a query with a comma separator, as well as closing paren 789 # in case of subqueries 790 while self._is_connected() and not self._match_set( 791 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 792 ): 793 parts.append(self._advance_any(ignore_reserved=True)) 794 795 return exp.var("".join(part.text for part in parts if part)) 796 797 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 798 this = super()._parse_lambda_arg() 799 800 if not this: 801 return this 802 803 typ = self._parse_types() 804 805 if typ: 806 return self.expression(exp.Cast, this=this, to=typ) 807 808 return this 809 810 def _parse_foreign_key(self) -> exp.ForeignKey: 811 # inlineFK, the REFERENCES columns are implied 812 if self._match(TokenType.REFERENCES, advance=False): 813 return self.expression(exp.ForeignKey) 814 815 # outoflineFK, explicitly names the columns 816 return super()._parse_foreign_key()
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
818 class Tokenizer(tokens.Tokenizer): 819 STRING_ESCAPES = ["\\", "'"] 820 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 821 RAW_STRINGS = ["$$"] 822 COMMENTS = ["--", "//", ("/*", "*/")] 823 NESTED_COMMENTS = False 824 825 KEYWORDS = { 826 **tokens.Tokenizer.KEYWORDS, 827 "BYTEINT": TokenType.INT, 828 "CHAR VARYING": TokenType.VARCHAR, 829 "CHARACTER VARYING": TokenType.VARCHAR, 830 "EXCLUDE": TokenType.EXCEPT, 831 "ILIKE ANY": TokenType.ILIKE_ANY, 832 "LIKE ANY": TokenType.LIKE_ANY, 833 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 834 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 835 "MINUS": TokenType.EXCEPT, 836 "NCHAR VARYING": TokenType.VARCHAR, 837 "PUT": TokenType.COMMAND, 838 "REMOVE": TokenType.COMMAND, 839 "RM": TokenType.COMMAND, 840 "SAMPLE": TokenType.TABLE_SAMPLE, 841 "SQL_DOUBLE": TokenType.DOUBLE, 842 "SQL_VARCHAR": TokenType.VARCHAR, 843 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 844 "TAG": TokenType.TAG, 845 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 846 "TOP": TokenType.TOP, 847 "WAREHOUSE": TokenType.WAREHOUSE, 848 "STREAMLIT": TokenType.STREAMLIT, 849 } 850 KEYWORDS.pop("/*+") 851 852 SINGLE_TOKENS = { 853 **tokens.Tokenizer.SINGLE_TOKENS, 854 "$": TokenType.PARAMETER, 855 } 856 857 VAR_SINGLE_TOKENS = {"$"} 858 859 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
861 class Generator(generator.Generator): 862 PARAMETER_TOKEN = "$" 863 MATCHED_BY_SOURCE = False 864 SINGLE_STRING_INTERVAL = True 865 JOIN_HINTS = False 866 TABLE_HINTS = False 867 QUERY_HINTS = False 868 AGGREGATE_FILTER_SUPPORTED = False 869 SUPPORTS_TABLE_COPY = False 870 COLLATE_IS_FUNC = True 871 LIMIT_ONLY_LITERALS = True 872 JSON_KEY_VALUE_PAIR_SEP = "," 873 INSERT_OVERWRITE = " OVERWRITE INTO" 874 STRUCT_DELIMITER = ("(", ")") 875 COPY_PARAMS_ARE_WRAPPED = False 876 COPY_PARAMS_EQ_REQUIRED = True 877 STAR_EXCEPT = "EXCLUDE" 878 SUPPORTS_EXPLODING_PROJECTIONS = False 879 ARRAY_CONCAT_IS_VAR_LEN = False 880 SUPPORTS_CONVERT_TIMEZONE = True 881 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 882 SUPPORTS_MEDIAN = True 883 ARRAY_SIZE_NAME = "ARRAY_SIZE" 884 885 TRANSFORMS = { 886 **generator.Generator.TRANSFORMS, 887 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 888 exp.ArgMax: rename_func("MAX_BY"), 889 exp.ArgMin: rename_func("MIN_BY"), 890 exp.Array: inline_array_sql, 891 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 892 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 893 exp.AtTimeZone: lambda self, e: self.func( 894 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 895 ), 896 exp.BitwiseOr: rename_func("BITOR"), 897 exp.BitwiseXor: rename_func("BITXOR"), 898 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 899 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 900 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 901 exp.DateAdd: date_delta_sql("DATEADD"), 902 exp.DateDiff: date_delta_sql("DATEDIFF"), 903 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 904 exp.DatetimeDiff: timestampdiff_sql, 905 exp.DateStrToDate: datestrtodate_sql, 906 exp.DayOfMonth: rename_func("DAYOFMONTH"), 907 exp.DayOfWeek: rename_func("DAYOFWEEK"), 908 exp.DayOfYear: rename_func("DAYOFYEAR"), 909 exp.Explode: rename_func("FLATTEN"), 910 exp.Extract: rename_func("DATE_PART"), 911 exp.FromTimeZone: lambda self, e: self.func( 912 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 913 ), 914 exp.GenerateSeries: lambda self, e: self.func( 915 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 916 ), 917 exp.GroupConcat: rename_func("LISTAGG"), 918 exp.If: if_sql(name="IFF", false_value="NULL"), 919 exp.JSONExtractArray: _json_extract_value_array_sql, 920 exp.JSONExtractScalar: lambda self, e: self.func( 921 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 922 ), 923 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 924 exp.JSONPathRoot: lambda *_: "", 925 exp.JSONValueArray: _json_extract_value_array_sql, 926 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 927 exp.LogicalOr: rename_func("BOOLOR_AGG"), 928 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 929 exp.MakeInterval: no_make_interval_sql, 930 exp.Max: max_or_greatest, 931 exp.Min: min_or_least, 932 exp.ParseJSON: lambda self, e: self.func( 933 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 934 ), 935 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 936 exp.PercentileCont: transforms.preprocess( 937 [transforms.add_within_group_for_percentiles] 938 ), 939 exp.PercentileDisc: transforms.preprocess( 940 [transforms.add_within_group_for_percentiles] 941 ), 942 exp.Pivot: transforms.preprocess([_unqualify_unpivot_columns]), 943 exp.RegexpExtract: _regexpextract_sql, 944 exp.RegexpExtractAll: _regexpextract_sql, 945 exp.RegexpILike: _regexpilike_sql, 946 exp.Rand: rename_func("RANDOM"), 947 exp.Select: transforms.preprocess( 948 [ 949 transforms.eliminate_distinct_on, 950 transforms.explode_to_unnest(), 951 transforms.eliminate_semi_and_anti_joins, 952 _transform_generate_date_array, 953 ] 954 ), 955 exp.SafeDivide: lambda self, e: no_safe_divide_sql(self, e, "IFF"), 956 exp.SHA: rename_func("SHA1"), 957 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 958 exp.StartsWith: rename_func("STARTSWITH"), 959 exp.StrPosition: lambda self, e: self.func( 960 "POSITION", e.args.get("substr"), e.this, e.args.get("position") 961 ), 962 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 963 exp.Stuff: rename_func("INSERT"), 964 exp.TimeAdd: date_delta_sql("TIMEADD"), 965 exp.Timestamp: no_timestamp_sql, 966 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 967 exp.TimestampDiff: lambda self, e: self.func( 968 "TIMESTAMPDIFF", e.unit, e.expression, e.this 969 ), 970 exp.TimestampTrunc: timestamptrunc_sql(), 971 exp.TimeStrToTime: timestrtotime_sql, 972 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 973 exp.ToArray: rename_func("TO_ARRAY"), 974 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 975 exp.ToDouble: rename_func("TO_DOUBLE"), 976 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 977 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 978 exp.TsOrDsToDate: lambda self, e: self.func( 979 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 980 ), 981 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 982 exp.Uuid: rename_func("UUID_STRING"), 983 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 984 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 985 exp.Xor: rename_func("BOOLXOR"), 986 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 987 rename_func("EDITDISTANCE") 988 ), 989 } 990 991 SUPPORTED_JSON_PATH_PARTS = { 992 exp.JSONPathKey, 993 exp.JSONPathRoot, 994 exp.JSONPathSubscript, 995 } 996 997 TYPE_MAPPING = { 998 **generator.Generator.TYPE_MAPPING, 999 exp.DataType.Type.NESTED: "OBJECT", 1000 exp.DataType.Type.STRUCT: "OBJECT", 1001 } 1002 1003 PROPERTIES_LOCATION = { 1004 **generator.Generator.PROPERTIES_LOCATION, 1005 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1006 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1007 } 1008 1009 UNSUPPORTED_VALUES_EXPRESSIONS = { 1010 exp.Map, 1011 exp.StarMap, 1012 exp.Struct, 1013 exp.VarMap, 1014 } 1015 1016 def with_properties(self, properties: exp.Properties) -> str: 1017 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1018 1019 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1020 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1021 values_as_table = False 1022 1023 return super().values_sql(expression, values_as_table=values_as_table) 1024 1025 def datatype_sql(self, expression: exp.DataType) -> str: 1026 expressions = expression.expressions 1027 if ( 1028 expressions 1029 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1030 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1031 ): 1032 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1033 return "OBJECT" 1034 1035 return super().datatype_sql(expression) 1036 1037 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1038 return self.func( 1039 "TO_NUMBER", 1040 expression.this, 1041 expression.args.get("format"), 1042 expression.args.get("precision"), 1043 expression.args.get("scale"), 1044 ) 1045 1046 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1047 milli = expression.args.get("milli") 1048 if milli is not None: 1049 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1050 expression.set("nano", milli_to_nano) 1051 1052 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1053 1054 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1055 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1056 return self.func("TO_GEOGRAPHY", expression.this) 1057 if expression.is_type(exp.DataType.Type.GEOMETRY): 1058 return self.func("TO_GEOMETRY", expression.this) 1059 1060 return super().cast_sql(expression, safe_prefix=safe_prefix) 1061 1062 def trycast_sql(self, expression: exp.TryCast) -> str: 1063 value = expression.this 1064 1065 if value.type is None: 1066 from sqlglot.optimizer.annotate_types import annotate_types 1067 1068 value = annotate_types(value) 1069 1070 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1071 return super().trycast_sql(expression) 1072 1073 # TRY_CAST only works for string values in Snowflake 1074 return self.cast_sql(expression) 1075 1076 def log_sql(self, expression: exp.Log) -> str: 1077 if not expression.expression: 1078 return self.func("LN", expression.this) 1079 1080 return super().log_sql(expression) 1081 1082 def unnest_sql(self, expression: exp.Unnest) -> str: 1083 unnest_alias = expression.args.get("alias") 1084 offset = expression.args.get("offset") 1085 1086 columns = [ 1087 exp.to_identifier("seq"), 1088 exp.to_identifier("key"), 1089 exp.to_identifier("path"), 1090 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1091 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1092 or exp.to_identifier("value"), 1093 exp.to_identifier("this"), 1094 ] 1095 1096 if unnest_alias: 1097 unnest_alias.set("columns", columns) 1098 else: 1099 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1100 1101 table_input = self.sql(expression.expressions[0]) 1102 if not table_input.startswith("INPUT =>"): 1103 table_input = f"INPUT => {table_input}" 1104 1105 explode = f"TABLE(FLATTEN({table_input}))" 1106 alias = self.sql(unnest_alias) 1107 alias = f" AS {alias}" if alias else "" 1108 return f"{explode}{alias}" 1109 1110 def show_sql(self, expression: exp.Show) -> str: 1111 terse = "TERSE " if expression.args.get("terse") else "" 1112 history = " HISTORY" if expression.args.get("history") else "" 1113 like = self.sql(expression, "like") 1114 like = f" LIKE {like}" if like else "" 1115 1116 scope = self.sql(expression, "scope") 1117 scope = f" {scope}" if scope else "" 1118 1119 scope_kind = self.sql(expression, "scope_kind") 1120 if scope_kind: 1121 scope_kind = f" IN {scope_kind}" 1122 1123 starts_with = self.sql(expression, "starts_with") 1124 if starts_with: 1125 starts_with = f" STARTS WITH {starts_with}" 1126 1127 limit = self.sql(expression, "limit") 1128 1129 from_ = self.sql(expression, "from") 1130 if from_: 1131 from_ = f" FROM {from_}" 1132 1133 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}" 1134 1135 def describe_sql(self, expression: exp.Describe) -> str: 1136 # Default to table if kind is unknown 1137 kind_value = expression.args.get("kind") or "TABLE" 1138 kind = f" {kind_value}" if kind_value else "" 1139 this = f" {self.sql(expression, 'this')}" 1140 expressions = self.expressions(expression, flat=True) 1141 expressions = f" {expressions}" if expressions else "" 1142 return f"DESCRIBE{kind}{this}{expressions}" 1143 1144 def generatedasidentitycolumnconstraint_sql( 1145 self, expression: exp.GeneratedAsIdentityColumnConstraint 1146 ) -> str: 1147 start = expression.args.get("start") 1148 start = f" START {start}" if start else "" 1149 increment = expression.args.get("increment") 1150 increment = f" INCREMENT {increment}" if increment else "" 1151 return f"AUTOINCREMENT{start}{increment}" 1152 1153 def cluster_sql(self, expression: exp.Cluster) -> str: 1154 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1155 1156 def struct_sql(self, expression: exp.Struct) -> str: 1157 keys = [] 1158 values = [] 1159 1160 for i, e in enumerate(expression.expressions): 1161 if isinstance(e, exp.PropertyEQ): 1162 keys.append( 1163 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1164 ) 1165 values.append(e.expression) 1166 else: 1167 keys.append(exp.Literal.string(f"_{i}")) 1168 values.append(e) 1169 1170 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1171 1172 @unsupported_args("weight", "accuracy") 1173 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1174 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1175 1176 def alterset_sql(self, expression: exp.AlterSet) -> str: 1177 exprs = self.expressions(expression, flat=True) 1178 exprs = f" {exprs}" if exprs else "" 1179 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1180 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1181 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1182 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1183 tag = self.expressions(expression, key="tag", flat=True) 1184 tag = f" TAG {tag}" if tag else "" 1185 1186 return f"SET{exprs}{file_format}{copy_options}{tag}" 1187 1188 def strtotime_sql(self, expression: exp.StrToTime): 1189 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1190 return self.func( 1191 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1192 ) 1193 1194 def timestampsub_sql(self, expression: exp.TimestampSub): 1195 return self.sql( 1196 exp.TimestampAdd( 1197 this=expression.this, 1198 expression=expression.expression * -1, 1199 unit=expression.unit, 1200 ) 1201 ) 1202 1203 def jsonextract_sql(self, expression: exp.JSONExtract): 1204 this = expression.this 1205 1206 # JSON strings are valid coming from other dialects such as BQ 1207 return self.func( 1208 "GET_PATH", 1209 exp.ParseJSON(this=this) if this.is_string else this, 1210 expression.expression, 1211 ) 1212 1213 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1214 this = expression.this 1215 if not isinstance(this, exp.TsOrDsToTimestamp): 1216 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1217 1218 return self.func("TO_CHAR", this, self.format_time(expression)) 1219 1220 def datesub_sql(self, expression: exp.DateSub) -> str: 1221 value = expression.expression 1222 if value: 1223 value.replace(value * (-1)) 1224 else: 1225 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1226 1227 return date_delta_sql("DATEADD")(self, expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1025 def datatype_sql(self, expression: exp.DataType) -> str: 1026 expressions = expression.expressions 1027 if ( 1028 expressions 1029 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1030 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1031 ): 1032 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1033 return "OBJECT" 1034 1035 return super().datatype_sql(expression)
1046 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1047 milli = expression.args.get("milli") 1048 if milli is not None: 1049 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1050 expression.set("nano", milli_to_nano) 1051 1052 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1054 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1055 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1056 return self.func("TO_GEOGRAPHY", expression.this) 1057 if expression.is_type(exp.DataType.Type.GEOMETRY): 1058 return self.func("TO_GEOMETRY", expression.this) 1059 1060 return super().cast_sql(expression, safe_prefix=safe_prefix)
1062 def trycast_sql(self, expression: exp.TryCast) -> str: 1063 value = expression.this 1064 1065 if value.type is None: 1066 from sqlglot.optimizer.annotate_types import annotate_types 1067 1068 value = annotate_types(value) 1069 1070 if value.is_type(*exp.DataType.TEXT_TYPES, exp.DataType.Type.UNKNOWN): 1071 return super().trycast_sql(expression) 1072 1073 # TRY_CAST only works for string values in Snowflake 1074 return self.cast_sql(expression)
1082 def unnest_sql(self, expression: exp.Unnest) -> str: 1083 unnest_alias = expression.args.get("alias") 1084 offset = expression.args.get("offset") 1085 1086 columns = [ 1087 exp.to_identifier("seq"), 1088 exp.to_identifier("key"), 1089 exp.to_identifier("path"), 1090 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1091 seq_get(unnest_alias.columns if unnest_alias else [], 0) 1092 or exp.to_identifier("value"), 1093 exp.to_identifier("this"), 1094 ] 1095 1096 if unnest_alias: 1097 unnest_alias.set("columns", columns) 1098 else: 1099 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1100 1101 table_input = self.sql(expression.expressions[0]) 1102 if not table_input.startswith("INPUT =>"): 1103 table_input = f"INPUT => {table_input}" 1104 1105 explode = f"TABLE(FLATTEN({table_input}))" 1106 alias = self.sql(unnest_alias) 1107 alias = f" AS {alias}" if alias else "" 1108 return f"{explode}{alias}"
1110 def show_sql(self, expression: exp.Show) -> str: 1111 terse = "TERSE " if expression.args.get("terse") else "" 1112 history = " HISTORY" if expression.args.get("history") else "" 1113 like = self.sql(expression, "like") 1114 like = f" LIKE {like}" if like else "" 1115 1116 scope = self.sql(expression, "scope") 1117 scope = f" {scope}" if scope else "" 1118 1119 scope_kind = self.sql(expression, "scope_kind") 1120 if scope_kind: 1121 scope_kind = f" IN {scope_kind}" 1122 1123 starts_with = self.sql(expression, "starts_with") 1124 if starts_with: 1125 starts_with = f" STARTS WITH {starts_with}" 1126 1127 limit = self.sql(expression, "limit") 1128 1129 from_ = self.sql(expression, "from") 1130 if from_: 1131 from_ = f" FROM {from_}" 1132 1133 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}"
1135 def describe_sql(self, expression: exp.Describe) -> str: 1136 # Default to table if kind is unknown 1137 kind_value = expression.args.get("kind") or "TABLE" 1138 kind = f" {kind_value}" if kind_value else "" 1139 this = f" {self.sql(expression, 'this')}" 1140 expressions = self.expressions(expression, flat=True) 1141 expressions = f" {expressions}" if expressions else "" 1142 return f"DESCRIBE{kind}{this}{expressions}"
1144 def generatedasidentitycolumnconstraint_sql( 1145 self, expression: exp.GeneratedAsIdentityColumnConstraint 1146 ) -> str: 1147 start = expression.args.get("start") 1148 start = f" START {start}" if start else "" 1149 increment = expression.args.get("increment") 1150 increment = f" INCREMENT {increment}" if increment else "" 1151 return f"AUTOINCREMENT{start}{increment}"
1156 def struct_sql(self, expression: exp.Struct) -> str: 1157 keys = [] 1158 values = [] 1159 1160 for i, e in enumerate(expression.expressions): 1161 if isinstance(e, exp.PropertyEQ): 1162 keys.append( 1163 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1164 ) 1165 values.append(e.expression) 1166 else: 1167 keys.append(exp.Literal.string(f"_{i}")) 1168 values.append(e) 1169 1170 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1176 def alterset_sql(self, expression: exp.AlterSet) -> str: 1177 exprs = self.expressions(expression, flat=True) 1178 exprs = f" {exprs}" if exprs else "" 1179 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1180 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1181 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1182 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1183 tag = self.expressions(expression, key="tag", flat=True) 1184 tag = f" TAG {tag}" if tag else "" 1185 1186 return f"SET{exprs}{file_format}{copy_options}{tag}"
1220 def datesub_sql(self, expression: exp.DateSub) -> str: 1221 value = expression.expression 1222 if value: 1223 value.replace(value * (-1)) 1224 else: 1225 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1226 1227 return date_delta_sql("DATEADD")(self, expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql