sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_unless_query, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 sha256_sql, 26 timestrtotime_sql, 27 ts_or_ds_add_cast, 28 unit_to_var, 29) 30from sqlglot.helper import seq_get, split_num_words 31from sqlglot.tokens import TokenType 32 33if t.TYPE_CHECKING: 34 from sqlglot._typing import E, Lit 35 36logger = logging.getLogger("sqlglot") 37 38 39def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 40 if not expression.find_ancestor(exp.From, exp.Join): 41 return self.values_sql(expression) 42 43 structs = [] 44 alias = expression.args.get("alias") 45 for tup in expression.find_all(exp.Tuple): 46 field_aliases = ( 47 alias.columns 48 if alias and alias.columns 49 else (f"_c{i}" for i in range(len(tup.expressions))) 50 ) 51 expressions = [ 52 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 53 for name, fld in zip(field_aliases, tup.expressions) 54 ] 55 structs.append(exp.Struct(expressions=expressions)) 56 57 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 58 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 59 return self.unnest_sql( 60 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 61 ) 62 63 64def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 65 this = expression.this 66 if isinstance(this, exp.Schema): 67 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 68 else: 69 this = self.sql(this) 70 return f"RETURNS {this}" 71 72 73def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 74 returns = expression.find(exp.ReturnsProperty) 75 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 76 expression.set("kind", "TABLE FUNCTION") 77 78 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 79 expression.set("expression", expression.expression.this) 80 81 return self.create_sql(expression) 82 83 84# https://issuetracker.google.com/issues/162294746 85# workaround for bigquery bug when grouping by an expression and then ordering 86# WITH x AS (SELECT 1 y) 87# SELECT y + 1 z 88# FROM x 89# GROUP BY x + 1 90# ORDER by z 91def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 92 if isinstance(expression, exp.Select): 93 group = expression.args.get("group") 94 order = expression.args.get("order") 95 96 if group and order: 97 aliases = { 98 select.this: select.args["alias"] 99 for select in expression.selects 100 if isinstance(select, exp.Alias) 101 } 102 103 for grouped in group.expressions: 104 if grouped.is_int: 105 continue 106 alias = aliases.get(grouped) 107 if alias: 108 grouped.replace(exp.column(alias)) 109 110 return expression 111 112 113def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 114 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 115 if isinstance(expression, exp.CTE) and expression.alias_column_names: 116 cte_query = expression.this 117 118 if cte_query.is_star: 119 logger.warning( 120 "Can't push down CTE column names for star queries. Run the query through" 121 " the optimizer or use 'qualify' to expand the star projections first." 122 ) 123 return expression 124 125 column_names = expression.alias_column_names 126 expression.args["alias"].set("columns", None) 127 128 for name, select in zip(column_names, cte_query.selects): 129 to_replace = select 130 131 if isinstance(select, exp.Alias): 132 select = select.this 133 134 # Inner aliases are shadowed by the CTE column names 135 to_replace.replace(exp.alias_(select, name)) 136 137 return expression 138 139 140def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 141 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 142 this.set("zone", seq_get(args, 2)) 143 return this 144 145 146def _build_timestamp(args: t.List) -> exp.Timestamp: 147 timestamp = exp.Timestamp.from_arg_list(args) 148 timestamp.set("with_tz", True) 149 return timestamp 150 151 152def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 153 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 154 return expr_type.from_arg_list(args) 155 156 157def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 158 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 159 arg = seq_get(args, 0) 160 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 161 162 163def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 164 return self.sql( 165 exp.Exists( 166 this=exp.select("1") 167 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 168 .where(exp.column("_col").eq(expression.right)) 169 ) 170 ) 171 172 173def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 174 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 175 176 177def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 178 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 179 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 180 unit = unit_to_var(expression) 181 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 182 183 184def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 185 scale = expression.args.get("scale") 186 timestamp = expression.this 187 188 if scale in (None, exp.UnixToTime.SECONDS): 189 return self.func("TIMESTAMP_SECONDS", timestamp) 190 if scale == exp.UnixToTime.MILLIS: 191 return self.func("TIMESTAMP_MILLIS", timestamp) 192 if scale == exp.UnixToTime.MICROS: 193 return self.func("TIMESTAMP_MICROS", timestamp) 194 195 unix_seconds = exp.cast( 196 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 197 ) 198 return self.func("TIMESTAMP_SECONDS", unix_seconds) 199 200 201def _build_time(args: t.List) -> exp.Func: 202 if len(args) == 1: 203 return exp.TsOrDsToTime(this=args[0]) 204 if len(args) == 2: 205 return exp.Time.from_arg_list(args) 206 return exp.TimeFromParts.from_arg_list(args) 207 208 209def _build_datetime(args: t.List) -> exp.Func: 210 if len(args) == 1: 211 return exp.TsOrDsToTimestamp.from_arg_list(args) 212 if len(args) == 2: 213 return exp.Datetime.from_arg_list(args) 214 return exp.TimestampFromParts.from_arg_list(args) 215 216 217def _build_regexp_extract(args: t.List) -> exp.RegexpExtract: 218 try: 219 group = re.compile(args[1].name).groups == 1 220 except re.error: 221 group = False 222 223 return exp.RegexpExtract( 224 this=seq_get(args, 0), 225 expression=seq_get(args, 1), 226 position=seq_get(args, 2), 227 occurrence=seq_get(args, 3), 228 group=exp.Literal.number(1) if group else None, 229 ) 230 231 232def _str_to_datetime_sql( 233 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 234) -> str: 235 this = self.sql(expression, "this") 236 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 237 238 if expression.args.get("safe"): 239 fmt = self.format_time( 240 expression, 241 self.dialect.INVERSE_FORMAT_MAPPING, 242 self.dialect.INVERSE_FORMAT_TRIE, 243 ) 244 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 245 246 fmt = self.format_time(expression) 247 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 248 249 250class BigQuery(Dialect): 251 WEEK_OFFSET = -1 252 UNNEST_COLUMN_ONLY = True 253 SUPPORTS_USER_DEFINED_TYPES = False 254 SUPPORTS_SEMI_ANTI_JOIN = False 255 LOG_BASE_FIRST = False 256 HEX_LOWERCASE = True 257 FORCE_EARLY_ALIAS_REF_EXPANSION = True 258 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 259 260 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 261 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 262 263 # bigquery udfs are case sensitive 264 NORMALIZE_FUNCTIONS = False 265 266 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 267 TIME_MAPPING = { 268 "%D": "%m/%d/%y", 269 "%E6S": "%S.%f", 270 "%e": "%-d", 271 } 272 273 FORMAT_MAPPING = { 274 "DD": "%d", 275 "MM": "%m", 276 "MON": "%b", 277 "MONTH": "%B", 278 "YYYY": "%Y", 279 "YY": "%y", 280 "HH": "%I", 281 "HH12": "%I", 282 "HH24": "%H", 283 "MI": "%M", 284 "SS": "%S", 285 "SSSSS": "%f", 286 "TZH": "%z", 287 } 288 289 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 290 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 291 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 292 293 # All set operations require either a DISTINCT or ALL specifier 294 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 295 296 def normalize_identifier(self, expression: E) -> E: 297 if ( 298 isinstance(expression, exp.Identifier) 299 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 300 ): 301 parent = expression.parent 302 while isinstance(parent, exp.Dot): 303 parent = parent.parent 304 305 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 306 # by default. The following check uses a heuristic to detect tables based on whether 307 # they are qualified. This should generally be correct, because tables in BigQuery 308 # must be qualified with at least a dataset, unless @@dataset_id is set. 309 case_sensitive = ( 310 isinstance(parent, exp.UserDefinedFunction) 311 or ( 312 isinstance(parent, exp.Table) 313 and parent.db 314 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 315 ) 316 or expression.meta.get("is_table") 317 ) 318 if not case_sensitive: 319 expression.set("this", expression.this.lower()) 320 321 return expression 322 323 class Tokenizer(tokens.Tokenizer): 324 QUOTES = ["'", '"', '"""', "'''"] 325 COMMENTS = ["--", "#", ("/*", "*/")] 326 IDENTIFIERS = ["`"] 327 STRING_ESCAPES = ["\\"] 328 329 HEX_STRINGS = [("0x", ""), ("0X", "")] 330 331 BYTE_STRINGS = [ 332 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 333 ] 334 335 RAW_STRINGS = [ 336 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 337 ] 338 339 KEYWORDS = { 340 **tokens.Tokenizer.KEYWORDS, 341 "ANY TYPE": TokenType.VARIANT, 342 "BEGIN": TokenType.COMMAND, 343 "BEGIN TRANSACTION": TokenType.BEGIN, 344 "BYTEINT": TokenType.INT, 345 "BYTES": TokenType.BINARY, 346 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 347 "DATETIME": TokenType.TIMESTAMP, 348 "DECLARE": TokenType.COMMAND, 349 "ELSEIF": TokenType.COMMAND, 350 "EXCEPTION": TokenType.COMMAND, 351 "FLOAT64": TokenType.DOUBLE, 352 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 353 "MODEL": TokenType.MODEL, 354 "NOT DETERMINISTIC": TokenType.VOLATILE, 355 "RECORD": TokenType.STRUCT, 356 "TIMESTAMP": TokenType.TIMESTAMPTZ, 357 } 358 KEYWORDS.pop("DIV") 359 KEYWORDS.pop("VALUES") 360 KEYWORDS.pop("/*+") 361 362 class Parser(parser.Parser): 363 PREFIXED_PIVOT_COLUMNS = True 364 LOG_DEFAULTS_TO_LN = True 365 SUPPORTS_IMPLICIT_UNNEST = True 366 367 FUNCTIONS = { 368 **parser.Parser.FUNCTIONS, 369 "DATE": _build_date, 370 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 371 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 372 "DATE_TRUNC": lambda args: exp.DateTrunc( 373 unit=exp.Literal.string(str(seq_get(args, 1))), 374 this=seq_get(args, 0), 375 ), 376 "DATETIME": _build_datetime, 377 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 378 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 379 "DIV": binary_from_function(exp.IntDiv), 380 "FORMAT_DATE": lambda args: exp.TimeToStr( 381 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 382 ), 383 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 384 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 385 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 386 ), 387 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 388 "MD5": exp.MD5Digest.from_arg_list, 389 "TO_HEX": _build_to_hex, 390 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 391 [seq_get(args, 1), seq_get(args, 0)] 392 ), 393 "PARSE_TIMESTAMP": _build_parse_timestamp, 394 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 395 "REGEXP_EXTRACT": _build_regexp_extract, 396 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 397 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 398 "SPLIT": lambda args: exp.Split( 399 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 400 this=seq_get(args, 0), 401 expression=seq_get(args, 1) or exp.Literal.string(","), 402 ), 403 "TIME": _build_time, 404 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 405 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 406 "TIMESTAMP": _build_timestamp, 407 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 408 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 409 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 410 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 411 ), 412 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 413 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 414 ), 415 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 416 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 417 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 418 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 419 ), 420 } 421 422 FUNCTION_PARSERS = { 423 **parser.Parser.FUNCTION_PARSERS, 424 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 425 } 426 FUNCTION_PARSERS.pop("TRIM") 427 428 NO_PAREN_FUNCTIONS = { 429 **parser.Parser.NO_PAREN_FUNCTIONS, 430 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 431 } 432 433 NESTED_TYPE_TOKENS = { 434 *parser.Parser.NESTED_TYPE_TOKENS, 435 TokenType.TABLE, 436 } 437 438 PROPERTY_PARSERS = { 439 **parser.Parser.PROPERTY_PARSERS, 440 "NOT DETERMINISTIC": lambda self: self.expression( 441 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 442 ), 443 "OPTIONS": lambda self: self._parse_with_property(), 444 } 445 446 CONSTRAINT_PARSERS = { 447 **parser.Parser.CONSTRAINT_PARSERS, 448 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 449 } 450 451 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 452 RANGE_PARSERS.pop(TokenType.OVERLAPS) 453 454 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 455 456 STATEMENT_PARSERS = { 457 **parser.Parser.STATEMENT_PARSERS, 458 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 459 TokenType.END: lambda self: self._parse_as_command(self._prev), 460 TokenType.FOR: lambda self: self._parse_for_in(), 461 } 462 463 BRACKET_OFFSETS = { 464 "OFFSET": (0, False), 465 "ORDINAL": (1, False), 466 "SAFE_OFFSET": (0, True), 467 "SAFE_ORDINAL": (1, True), 468 } 469 470 def _parse_for_in(self) -> exp.ForIn: 471 this = self._parse_range() 472 self._match_text_seq("DO") 473 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 474 475 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 476 this = super()._parse_table_part(schema=schema) or self._parse_number() 477 478 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 479 if isinstance(this, exp.Identifier): 480 table_name = this.name 481 while self._match(TokenType.DASH, advance=False) and self._next: 482 text = "" 483 while self._curr and self._curr.token_type != TokenType.DOT: 484 self._advance() 485 text += self._prev.text 486 table_name += text 487 488 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 489 elif isinstance(this, exp.Literal): 490 table_name = this.name 491 492 if self._is_connected() and self._parse_var(any_token=True): 493 table_name += self._prev.text 494 495 this = exp.Identifier(this=table_name, quoted=True) 496 497 return this 498 499 def _parse_table_parts( 500 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 501 ) -> exp.Table: 502 table = super()._parse_table_parts( 503 schema=schema, is_db_reference=is_db_reference, wildcard=True 504 ) 505 506 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 507 if not table.catalog: 508 if table.db: 509 parts = table.db.split(".") 510 if len(parts) == 2 and not table.args["db"].quoted: 511 table.set("catalog", exp.Identifier(this=parts[0])) 512 table.set("db", exp.Identifier(this=parts[1])) 513 else: 514 parts = table.name.split(".") 515 if len(parts) == 2 and not table.this.quoted: 516 table.set("db", exp.Identifier(this=parts[0])) 517 table.set("this", exp.Identifier(this=parts[1])) 518 519 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 520 catalog, db, this, *rest = ( 521 exp.to_identifier(p, quoted=True) 522 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 523 ) 524 525 if rest and this: 526 this = exp.Dot.build([this, *rest]) # type: ignore 527 528 table = exp.Table( 529 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 530 ) 531 table.meta["quoted_table"] = True 532 533 return table 534 535 def _parse_column(self) -> t.Optional[exp.Expression]: 536 column = super()._parse_column() 537 if isinstance(column, exp.Column): 538 parts = column.parts 539 if any("." in p.name for p in parts): 540 catalog, db, table, this, *rest = ( 541 exp.to_identifier(p, quoted=True) 542 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 543 ) 544 545 if rest and this: 546 this = exp.Dot.build([this, *rest]) # type: ignore 547 548 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 549 column.meta["quoted_column"] = True 550 551 return column 552 553 @t.overload 554 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 555 556 @t.overload 557 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 558 559 def _parse_json_object(self, agg=False): 560 json_object = super()._parse_json_object() 561 array_kv_pair = seq_get(json_object.expressions, 0) 562 563 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 564 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 565 if ( 566 array_kv_pair 567 and isinstance(array_kv_pair.this, exp.Array) 568 and isinstance(array_kv_pair.expression, exp.Array) 569 ): 570 keys = array_kv_pair.this.expressions 571 values = array_kv_pair.expression.expressions 572 573 json_object.set( 574 "expressions", 575 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 576 ) 577 578 return json_object 579 580 def _parse_bracket( 581 self, this: t.Optional[exp.Expression] = None 582 ) -> t.Optional[exp.Expression]: 583 bracket = super()._parse_bracket(this) 584 585 if this is bracket: 586 return bracket 587 588 if isinstance(bracket, exp.Bracket): 589 for expression in bracket.expressions: 590 name = expression.name.upper() 591 592 if name not in self.BRACKET_OFFSETS: 593 break 594 595 offset, safe = self.BRACKET_OFFSETS[name] 596 bracket.set("offset", offset) 597 bracket.set("safe", safe) 598 expression.replace(expression.expressions[0]) 599 600 return bracket 601 602 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 603 unnest = super()._parse_unnest(with_alias=with_alias) 604 605 if not unnest: 606 return None 607 608 unnest_expr = seq_get(unnest.expressions, 0) 609 if unnest_expr: 610 from sqlglot.optimizer.annotate_types import annotate_types 611 612 unnest_expr = annotate_types(unnest_expr) 613 614 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 615 # in contrast to other dialects such as DuckDB which flattens only the array by default 616 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 617 array_elem.is_type(exp.DataType.Type.STRUCT) 618 for array_elem in unnest_expr._type.expressions 619 ): 620 unnest.set("explode_array", True) 621 622 return unnest 623 624 class Generator(generator.Generator): 625 INTERVAL_ALLOWS_PLURAL_FORM = False 626 JOIN_HINTS = False 627 QUERY_HINTS = False 628 TABLE_HINTS = False 629 LIMIT_FETCH = "LIMIT" 630 RENAME_TABLE_WITH_DB = False 631 NVL2_SUPPORTED = False 632 UNNEST_WITH_ORDINALITY = False 633 COLLATE_IS_FUNC = True 634 LIMIT_ONLY_LITERALS = True 635 SUPPORTS_TABLE_ALIAS_COLUMNS = False 636 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 637 JSON_KEY_VALUE_PAIR_SEP = "," 638 NULL_ORDERING_SUPPORTED = False 639 IGNORE_NULLS_IN_FUNC = True 640 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 641 CAN_IMPLEMENT_ARRAY_ANY = True 642 SUPPORTS_TO_NUMBER = False 643 NAMED_PLACEHOLDER_TOKEN = "@" 644 HEX_FUNC = "TO_HEX" 645 WITH_PROPERTIES_PREFIX = "OPTIONS" 646 SUPPORTS_EXPLODING_PROJECTIONS = False 647 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 648 649 TRANSFORMS = { 650 **generator.Generator.TRANSFORMS, 651 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 652 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 653 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 654 exp.Array: inline_array_unless_query, 655 exp.ArrayContains: _array_contains_sql, 656 exp.ArrayFilter: filter_array_using_unnest, 657 exp.ArraySize: rename_func("ARRAY_LENGTH"), 658 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 659 exp.CollateProperty: lambda self, e: ( 660 f"DEFAULT COLLATE {self.sql(e, 'this')}" 661 if e.args.get("default") 662 else f"COLLATE {self.sql(e, 'this')}" 663 ), 664 exp.Commit: lambda *_: "COMMIT TRANSACTION", 665 exp.CountIf: rename_func("COUNTIF"), 666 exp.Create: _create_sql, 667 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 668 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 669 exp.DateDiff: lambda self, e: self.func( 670 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 671 ), 672 exp.DateFromParts: rename_func("DATE"), 673 exp.DateStrToDate: datestrtodate_sql, 674 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 675 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 676 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 677 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 678 exp.FromTimeZone: lambda self, e: self.func( 679 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 680 ), 681 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 682 exp.GroupConcat: rename_func("STRING_AGG"), 683 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 684 exp.If: if_sql(false_value="NULL"), 685 exp.ILike: no_ilike_sql, 686 exp.IntDiv: rename_func("DIV"), 687 exp.JSONFormat: rename_func("TO_JSON_STRING"), 688 exp.Max: max_or_greatest, 689 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 690 exp.MD5Digest: rename_func("MD5"), 691 exp.Min: min_or_least, 692 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 693 exp.RegexpExtract: lambda self, e: self.func( 694 "REGEXP_EXTRACT", 695 e.this, 696 e.expression, 697 e.args.get("position"), 698 e.args.get("occurrence"), 699 ), 700 exp.RegexpReplace: regexp_replace_sql, 701 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 702 exp.ReturnsProperty: _returnsproperty_sql, 703 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 704 exp.Select: transforms.preprocess( 705 [ 706 transforms.explode_to_unnest(), 707 transforms.unqualify_unnest, 708 transforms.eliminate_distinct_on, 709 _alias_ordered_group, 710 transforms.eliminate_semi_and_anti_joins, 711 ] 712 ), 713 exp.SHA: rename_func("SHA1"), 714 exp.SHA2: sha256_sql, 715 exp.StabilityProperty: lambda self, e: ( 716 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 717 ), 718 exp.StrToDate: _str_to_datetime_sql, 719 exp.StrToTime: _str_to_datetime_sql, 720 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 721 exp.TimeFromParts: rename_func("TIME"), 722 exp.TimestampFromParts: rename_func("DATETIME"), 723 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 724 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 725 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 726 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 727 exp.TimeStrToTime: timestrtotime_sql, 728 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 729 exp.TsOrDsAdd: _ts_or_ds_add_sql, 730 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 731 exp.TsOrDsToTime: rename_func("TIME"), 732 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 733 exp.Unhex: rename_func("FROM_HEX"), 734 exp.UnixDate: rename_func("UNIX_DATE"), 735 exp.UnixToTime: _unix_to_time_sql, 736 exp.Uuid: lambda *_: "GENERATE_UUID()", 737 exp.Values: _derived_table_values_to_unnest, 738 exp.VariancePop: rename_func("VAR_POP"), 739 } 740 741 SUPPORTED_JSON_PATH_PARTS = { 742 exp.JSONPathKey, 743 exp.JSONPathRoot, 744 exp.JSONPathSubscript, 745 } 746 747 TYPE_MAPPING = { 748 **generator.Generator.TYPE_MAPPING, 749 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 750 exp.DataType.Type.BIGINT: "INT64", 751 exp.DataType.Type.BINARY: "BYTES", 752 exp.DataType.Type.BOOLEAN: "BOOL", 753 exp.DataType.Type.CHAR: "STRING", 754 exp.DataType.Type.DECIMAL: "NUMERIC", 755 exp.DataType.Type.DOUBLE: "FLOAT64", 756 exp.DataType.Type.FLOAT: "FLOAT64", 757 exp.DataType.Type.INT: "INT64", 758 exp.DataType.Type.NCHAR: "STRING", 759 exp.DataType.Type.NVARCHAR: "STRING", 760 exp.DataType.Type.SMALLINT: "INT64", 761 exp.DataType.Type.TEXT: "STRING", 762 exp.DataType.Type.TIMESTAMP: "DATETIME", 763 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 764 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 765 exp.DataType.Type.TINYINT: "INT64", 766 exp.DataType.Type.ROWVERSION: "BYTES", 767 exp.DataType.Type.UUID: "STRING", 768 exp.DataType.Type.VARBINARY: "BYTES", 769 exp.DataType.Type.VARCHAR: "STRING", 770 exp.DataType.Type.VARIANT: "ANY TYPE", 771 } 772 773 PROPERTIES_LOCATION = { 774 **generator.Generator.PROPERTIES_LOCATION, 775 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 776 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 777 } 778 779 # WINDOW comes after QUALIFY 780 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 781 AFTER_HAVING_MODIFIER_TRANSFORMS = { 782 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 783 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 784 } 785 786 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 787 RESERVED_KEYWORDS = { 788 "all", 789 "and", 790 "any", 791 "array", 792 "as", 793 "asc", 794 "assert_rows_modified", 795 "at", 796 "between", 797 "by", 798 "case", 799 "cast", 800 "collate", 801 "contains", 802 "create", 803 "cross", 804 "cube", 805 "current", 806 "default", 807 "define", 808 "desc", 809 "distinct", 810 "else", 811 "end", 812 "enum", 813 "escape", 814 "except", 815 "exclude", 816 "exists", 817 "extract", 818 "false", 819 "fetch", 820 "following", 821 "for", 822 "from", 823 "full", 824 "group", 825 "grouping", 826 "groups", 827 "hash", 828 "having", 829 "if", 830 "ignore", 831 "in", 832 "inner", 833 "intersect", 834 "interval", 835 "into", 836 "is", 837 "join", 838 "lateral", 839 "left", 840 "like", 841 "limit", 842 "lookup", 843 "merge", 844 "natural", 845 "new", 846 "no", 847 "not", 848 "null", 849 "nulls", 850 "of", 851 "on", 852 "or", 853 "order", 854 "outer", 855 "over", 856 "partition", 857 "preceding", 858 "proto", 859 "qualify", 860 "range", 861 "recursive", 862 "respect", 863 "right", 864 "rollup", 865 "rows", 866 "select", 867 "set", 868 "some", 869 "struct", 870 "tablesample", 871 "then", 872 "to", 873 "treat", 874 "true", 875 "unbounded", 876 "union", 877 "unnest", 878 "using", 879 "when", 880 "where", 881 "window", 882 "with", 883 "within", 884 } 885 886 def mod_sql(self, expression: exp.Mod) -> str: 887 this = expression.this 888 expr = expression.expression 889 return self.func( 890 "MOD", 891 this.unnest() if isinstance(this, exp.Paren) else this, 892 expr.unnest() if isinstance(expr, exp.Paren) else expr, 893 ) 894 895 def column_parts(self, expression: exp.Column) -> str: 896 if expression.meta.get("quoted_column"): 897 # If a column reference is of the form `dataset.table`.name, we need 898 # to preserve the quoted table path, otherwise the reference breaks 899 table_parts = ".".join(p.name for p in expression.parts[:-1]) 900 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 901 return f"{table_path}.{self.sql(expression, 'this')}" 902 903 return super().column_parts(expression) 904 905 def table_parts(self, expression: exp.Table) -> str: 906 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 907 # we need to make sure the correct quoting is used in each case. 908 # 909 # For example, if there is a CTE x that clashes with a schema name, then the former will 910 # return the table y in that schema, whereas the latter will return the CTE's y column: 911 # 912 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 913 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 914 if expression.meta.get("quoted_table"): 915 table_parts = ".".join(p.name for p in expression.parts) 916 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 917 918 return super().table_parts(expression) 919 920 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 921 if isinstance(expression.this, exp.TsOrDsToTimestamp): 922 func_name = "FORMAT_DATETIME" 923 else: 924 func_name = "FORMAT_DATE" 925 this = ( 926 expression.this 927 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 928 else expression 929 ) 930 return self.func(func_name, self.format_time(expression), this.this) 931 932 def eq_sql(self, expression: exp.EQ) -> str: 933 # Operands of = cannot be NULL in BigQuery 934 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 935 if not isinstance(expression.parent, exp.Update): 936 return "NULL" 937 938 return self.binary(expression, "=") 939 940 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 941 parent = expression.parent 942 943 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 944 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 945 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 946 return self.func( 947 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 948 ) 949 950 return super().attimezone_sql(expression) 951 952 def trycast_sql(self, expression: exp.TryCast) -> str: 953 return self.cast_sql(expression, safe_prefix="SAFE_") 954 955 def bracket_sql(self, expression: exp.Bracket) -> str: 956 this = expression.this 957 expressions = expression.expressions 958 959 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 960 arg = expressions[0] 961 if arg.type is None: 962 from sqlglot.optimizer.annotate_types import annotate_types 963 964 arg = annotate_types(arg) 965 966 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 967 # BQ doesn't support bracket syntax with string values for structs 968 return f"{self.sql(this)}.{arg.name}" 969 970 expressions_sql = self.expressions(expression, flat=True) 971 offset = expression.args.get("offset") 972 973 if offset == 0: 974 expressions_sql = f"OFFSET({expressions_sql})" 975 elif offset == 1: 976 expressions_sql = f"ORDINAL({expressions_sql})" 977 elif offset is not None: 978 self.unsupported(f"Unsupported array offset: {offset}") 979 980 if expression.args.get("safe"): 981 expressions_sql = f"SAFE_{expressions_sql}" 982 983 return f"{self.sql(this)}[{expressions_sql}]" 984 985 def in_unnest_op(self, expression: exp.Unnest) -> str: 986 return self.sql(expression) 987 988 def version_sql(self, expression: exp.Version) -> str: 989 if expression.name == "TIMESTAMP": 990 expression.set("this", "SYSTEM_TIME") 991 return super().version_sql(expression)
251class BigQuery(Dialect): 252 WEEK_OFFSET = -1 253 UNNEST_COLUMN_ONLY = True 254 SUPPORTS_USER_DEFINED_TYPES = False 255 SUPPORTS_SEMI_ANTI_JOIN = False 256 LOG_BASE_FIRST = False 257 HEX_LOWERCASE = True 258 FORCE_EARLY_ALIAS_REF_EXPANSION = True 259 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 260 261 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 262 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 263 264 # bigquery udfs are case sensitive 265 NORMALIZE_FUNCTIONS = False 266 267 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 268 TIME_MAPPING = { 269 "%D": "%m/%d/%y", 270 "%E6S": "%S.%f", 271 "%e": "%-d", 272 } 273 274 FORMAT_MAPPING = { 275 "DD": "%d", 276 "MM": "%m", 277 "MON": "%b", 278 "MONTH": "%B", 279 "YYYY": "%Y", 280 "YY": "%y", 281 "HH": "%I", 282 "HH12": "%I", 283 "HH24": "%H", 284 "MI": "%M", 285 "SS": "%S", 286 "SSSSS": "%f", 287 "TZH": "%z", 288 } 289 290 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 291 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 292 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 293 294 # All set operations require either a DISTINCT or ALL specifier 295 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 296 297 def normalize_identifier(self, expression: E) -> E: 298 if ( 299 isinstance(expression, exp.Identifier) 300 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 301 ): 302 parent = expression.parent 303 while isinstance(parent, exp.Dot): 304 parent = parent.parent 305 306 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 307 # by default. The following check uses a heuristic to detect tables based on whether 308 # they are qualified. This should generally be correct, because tables in BigQuery 309 # must be qualified with at least a dataset, unless @@dataset_id is set. 310 case_sensitive = ( 311 isinstance(parent, exp.UserDefinedFunction) 312 or ( 313 isinstance(parent, exp.Table) 314 and parent.db 315 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 316 ) 317 or expression.meta.get("is_table") 318 ) 319 if not case_sensitive: 320 expression.set("this", expression.this.lower()) 321 322 return expression 323 324 class Tokenizer(tokens.Tokenizer): 325 QUOTES = ["'", '"', '"""', "'''"] 326 COMMENTS = ["--", "#", ("/*", "*/")] 327 IDENTIFIERS = ["`"] 328 STRING_ESCAPES = ["\\"] 329 330 HEX_STRINGS = [("0x", ""), ("0X", "")] 331 332 BYTE_STRINGS = [ 333 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 334 ] 335 336 RAW_STRINGS = [ 337 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 338 ] 339 340 KEYWORDS = { 341 **tokens.Tokenizer.KEYWORDS, 342 "ANY TYPE": TokenType.VARIANT, 343 "BEGIN": TokenType.COMMAND, 344 "BEGIN TRANSACTION": TokenType.BEGIN, 345 "BYTEINT": TokenType.INT, 346 "BYTES": TokenType.BINARY, 347 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 348 "DATETIME": TokenType.TIMESTAMP, 349 "DECLARE": TokenType.COMMAND, 350 "ELSEIF": TokenType.COMMAND, 351 "EXCEPTION": TokenType.COMMAND, 352 "FLOAT64": TokenType.DOUBLE, 353 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 354 "MODEL": TokenType.MODEL, 355 "NOT DETERMINISTIC": TokenType.VOLATILE, 356 "RECORD": TokenType.STRUCT, 357 "TIMESTAMP": TokenType.TIMESTAMPTZ, 358 } 359 KEYWORDS.pop("DIV") 360 KEYWORDS.pop("VALUES") 361 KEYWORDS.pop("/*+") 362 363 class Parser(parser.Parser): 364 PREFIXED_PIVOT_COLUMNS = True 365 LOG_DEFAULTS_TO_LN = True 366 SUPPORTS_IMPLICIT_UNNEST = True 367 368 FUNCTIONS = { 369 **parser.Parser.FUNCTIONS, 370 "DATE": _build_date, 371 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 372 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 373 "DATE_TRUNC": lambda args: exp.DateTrunc( 374 unit=exp.Literal.string(str(seq_get(args, 1))), 375 this=seq_get(args, 0), 376 ), 377 "DATETIME": _build_datetime, 378 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 379 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 380 "DIV": binary_from_function(exp.IntDiv), 381 "FORMAT_DATE": lambda args: exp.TimeToStr( 382 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 383 ), 384 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 385 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 386 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 387 ), 388 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 389 "MD5": exp.MD5Digest.from_arg_list, 390 "TO_HEX": _build_to_hex, 391 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 392 [seq_get(args, 1), seq_get(args, 0)] 393 ), 394 "PARSE_TIMESTAMP": _build_parse_timestamp, 395 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 396 "REGEXP_EXTRACT": _build_regexp_extract, 397 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 398 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 399 "SPLIT": lambda args: exp.Split( 400 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 401 this=seq_get(args, 0), 402 expression=seq_get(args, 1) or exp.Literal.string(","), 403 ), 404 "TIME": _build_time, 405 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 406 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 407 "TIMESTAMP": _build_timestamp, 408 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 409 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 410 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 411 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 412 ), 413 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 414 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 415 ), 416 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 417 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 418 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 419 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 420 ), 421 } 422 423 FUNCTION_PARSERS = { 424 **parser.Parser.FUNCTION_PARSERS, 425 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 426 } 427 FUNCTION_PARSERS.pop("TRIM") 428 429 NO_PAREN_FUNCTIONS = { 430 **parser.Parser.NO_PAREN_FUNCTIONS, 431 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 432 } 433 434 NESTED_TYPE_TOKENS = { 435 *parser.Parser.NESTED_TYPE_TOKENS, 436 TokenType.TABLE, 437 } 438 439 PROPERTY_PARSERS = { 440 **parser.Parser.PROPERTY_PARSERS, 441 "NOT DETERMINISTIC": lambda self: self.expression( 442 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 443 ), 444 "OPTIONS": lambda self: self._parse_with_property(), 445 } 446 447 CONSTRAINT_PARSERS = { 448 **parser.Parser.CONSTRAINT_PARSERS, 449 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 450 } 451 452 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 453 RANGE_PARSERS.pop(TokenType.OVERLAPS) 454 455 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 456 457 STATEMENT_PARSERS = { 458 **parser.Parser.STATEMENT_PARSERS, 459 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 460 TokenType.END: lambda self: self._parse_as_command(self._prev), 461 TokenType.FOR: lambda self: self._parse_for_in(), 462 } 463 464 BRACKET_OFFSETS = { 465 "OFFSET": (0, False), 466 "ORDINAL": (1, False), 467 "SAFE_OFFSET": (0, True), 468 "SAFE_ORDINAL": (1, True), 469 } 470 471 def _parse_for_in(self) -> exp.ForIn: 472 this = self._parse_range() 473 self._match_text_seq("DO") 474 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 475 476 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 477 this = super()._parse_table_part(schema=schema) or self._parse_number() 478 479 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 480 if isinstance(this, exp.Identifier): 481 table_name = this.name 482 while self._match(TokenType.DASH, advance=False) and self._next: 483 text = "" 484 while self._curr and self._curr.token_type != TokenType.DOT: 485 self._advance() 486 text += self._prev.text 487 table_name += text 488 489 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 490 elif isinstance(this, exp.Literal): 491 table_name = this.name 492 493 if self._is_connected() and self._parse_var(any_token=True): 494 table_name += self._prev.text 495 496 this = exp.Identifier(this=table_name, quoted=True) 497 498 return this 499 500 def _parse_table_parts( 501 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 502 ) -> exp.Table: 503 table = super()._parse_table_parts( 504 schema=schema, is_db_reference=is_db_reference, wildcard=True 505 ) 506 507 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 508 if not table.catalog: 509 if table.db: 510 parts = table.db.split(".") 511 if len(parts) == 2 and not table.args["db"].quoted: 512 table.set("catalog", exp.Identifier(this=parts[0])) 513 table.set("db", exp.Identifier(this=parts[1])) 514 else: 515 parts = table.name.split(".") 516 if len(parts) == 2 and not table.this.quoted: 517 table.set("db", exp.Identifier(this=parts[0])) 518 table.set("this", exp.Identifier(this=parts[1])) 519 520 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 521 catalog, db, this, *rest = ( 522 exp.to_identifier(p, quoted=True) 523 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 524 ) 525 526 if rest and this: 527 this = exp.Dot.build([this, *rest]) # type: ignore 528 529 table = exp.Table( 530 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 531 ) 532 table.meta["quoted_table"] = True 533 534 return table 535 536 def _parse_column(self) -> t.Optional[exp.Expression]: 537 column = super()._parse_column() 538 if isinstance(column, exp.Column): 539 parts = column.parts 540 if any("." in p.name for p in parts): 541 catalog, db, table, this, *rest = ( 542 exp.to_identifier(p, quoted=True) 543 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 544 ) 545 546 if rest and this: 547 this = exp.Dot.build([this, *rest]) # type: ignore 548 549 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 550 column.meta["quoted_column"] = True 551 552 return column 553 554 @t.overload 555 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 556 557 @t.overload 558 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 559 560 def _parse_json_object(self, agg=False): 561 json_object = super()._parse_json_object() 562 array_kv_pair = seq_get(json_object.expressions, 0) 563 564 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 565 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 566 if ( 567 array_kv_pair 568 and isinstance(array_kv_pair.this, exp.Array) 569 and isinstance(array_kv_pair.expression, exp.Array) 570 ): 571 keys = array_kv_pair.this.expressions 572 values = array_kv_pair.expression.expressions 573 574 json_object.set( 575 "expressions", 576 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 577 ) 578 579 return json_object 580 581 def _parse_bracket( 582 self, this: t.Optional[exp.Expression] = None 583 ) -> t.Optional[exp.Expression]: 584 bracket = super()._parse_bracket(this) 585 586 if this is bracket: 587 return bracket 588 589 if isinstance(bracket, exp.Bracket): 590 for expression in bracket.expressions: 591 name = expression.name.upper() 592 593 if name not in self.BRACKET_OFFSETS: 594 break 595 596 offset, safe = self.BRACKET_OFFSETS[name] 597 bracket.set("offset", offset) 598 bracket.set("safe", safe) 599 expression.replace(expression.expressions[0]) 600 601 return bracket 602 603 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 604 unnest = super()._parse_unnest(with_alias=with_alias) 605 606 if not unnest: 607 return None 608 609 unnest_expr = seq_get(unnest.expressions, 0) 610 if unnest_expr: 611 from sqlglot.optimizer.annotate_types import annotate_types 612 613 unnest_expr = annotate_types(unnest_expr) 614 615 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 616 # in contrast to other dialects such as DuckDB which flattens only the array by default 617 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 618 array_elem.is_type(exp.DataType.Type.STRUCT) 619 for array_elem in unnest_expr._type.expressions 620 ): 621 unnest.set("explode_array", True) 622 623 return unnest 624 625 class Generator(generator.Generator): 626 INTERVAL_ALLOWS_PLURAL_FORM = False 627 JOIN_HINTS = False 628 QUERY_HINTS = False 629 TABLE_HINTS = False 630 LIMIT_FETCH = "LIMIT" 631 RENAME_TABLE_WITH_DB = False 632 NVL2_SUPPORTED = False 633 UNNEST_WITH_ORDINALITY = False 634 COLLATE_IS_FUNC = True 635 LIMIT_ONLY_LITERALS = True 636 SUPPORTS_TABLE_ALIAS_COLUMNS = False 637 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 638 JSON_KEY_VALUE_PAIR_SEP = "," 639 NULL_ORDERING_SUPPORTED = False 640 IGNORE_NULLS_IN_FUNC = True 641 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 642 CAN_IMPLEMENT_ARRAY_ANY = True 643 SUPPORTS_TO_NUMBER = False 644 NAMED_PLACEHOLDER_TOKEN = "@" 645 HEX_FUNC = "TO_HEX" 646 WITH_PROPERTIES_PREFIX = "OPTIONS" 647 SUPPORTS_EXPLODING_PROJECTIONS = False 648 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 649 650 TRANSFORMS = { 651 **generator.Generator.TRANSFORMS, 652 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 653 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 654 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 655 exp.Array: inline_array_unless_query, 656 exp.ArrayContains: _array_contains_sql, 657 exp.ArrayFilter: filter_array_using_unnest, 658 exp.ArraySize: rename_func("ARRAY_LENGTH"), 659 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 660 exp.CollateProperty: lambda self, e: ( 661 f"DEFAULT COLLATE {self.sql(e, 'this')}" 662 if e.args.get("default") 663 else f"COLLATE {self.sql(e, 'this')}" 664 ), 665 exp.Commit: lambda *_: "COMMIT TRANSACTION", 666 exp.CountIf: rename_func("COUNTIF"), 667 exp.Create: _create_sql, 668 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 669 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 670 exp.DateDiff: lambda self, e: self.func( 671 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 672 ), 673 exp.DateFromParts: rename_func("DATE"), 674 exp.DateStrToDate: datestrtodate_sql, 675 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 676 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 677 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 678 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 679 exp.FromTimeZone: lambda self, e: self.func( 680 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 681 ), 682 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 683 exp.GroupConcat: rename_func("STRING_AGG"), 684 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 685 exp.If: if_sql(false_value="NULL"), 686 exp.ILike: no_ilike_sql, 687 exp.IntDiv: rename_func("DIV"), 688 exp.JSONFormat: rename_func("TO_JSON_STRING"), 689 exp.Max: max_or_greatest, 690 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 691 exp.MD5Digest: rename_func("MD5"), 692 exp.Min: min_or_least, 693 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 694 exp.RegexpExtract: lambda self, e: self.func( 695 "REGEXP_EXTRACT", 696 e.this, 697 e.expression, 698 e.args.get("position"), 699 e.args.get("occurrence"), 700 ), 701 exp.RegexpReplace: regexp_replace_sql, 702 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 703 exp.ReturnsProperty: _returnsproperty_sql, 704 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 705 exp.Select: transforms.preprocess( 706 [ 707 transforms.explode_to_unnest(), 708 transforms.unqualify_unnest, 709 transforms.eliminate_distinct_on, 710 _alias_ordered_group, 711 transforms.eliminate_semi_and_anti_joins, 712 ] 713 ), 714 exp.SHA: rename_func("SHA1"), 715 exp.SHA2: sha256_sql, 716 exp.StabilityProperty: lambda self, e: ( 717 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 718 ), 719 exp.StrToDate: _str_to_datetime_sql, 720 exp.StrToTime: _str_to_datetime_sql, 721 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 722 exp.TimeFromParts: rename_func("TIME"), 723 exp.TimestampFromParts: rename_func("DATETIME"), 724 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 725 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 726 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 727 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 728 exp.TimeStrToTime: timestrtotime_sql, 729 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 730 exp.TsOrDsAdd: _ts_or_ds_add_sql, 731 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 732 exp.TsOrDsToTime: rename_func("TIME"), 733 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 734 exp.Unhex: rename_func("FROM_HEX"), 735 exp.UnixDate: rename_func("UNIX_DATE"), 736 exp.UnixToTime: _unix_to_time_sql, 737 exp.Uuid: lambda *_: "GENERATE_UUID()", 738 exp.Values: _derived_table_values_to_unnest, 739 exp.VariancePop: rename_func("VAR_POP"), 740 } 741 742 SUPPORTED_JSON_PATH_PARTS = { 743 exp.JSONPathKey, 744 exp.JSONPathRoot, 745 exp.JSONPathSubscript, 746 } 747 748 TYPE_MAPPING = { 749 **generator.Generator.TYPE_MAPPING, 750 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 751 exp.DataType.Type.BIGINT: "INT64", 752 exp.DataType.Type.BINARY: "BYTES", 753 exp.DataType.Type.BOOLEAN: "BOOL", 754 exp.DataType.Type.CHAR: "STRING", 755 exp.DataType.Type.DECIMAL: "NUMERIC", 756 exp.DataType.Type.DOUBLE: "FLOAT64", 757 exp.DataType.Type.FLOAT: "FLOAT64", 758 exp.DataType.Type.INT: "INT64", 759 exp.DataType.Type.NCHAR: "STRING", 760 exp.DataType.Type.NVARCHAR: "STRING", 761 exp.DataType.Type.SMALLINT: "INT64", 762 exp.DataType.Type.TEXT: "STRING", 763 exp.DataType.Type.TIMESTAMP: "DATETIME", 764 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 765 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 766 exp.DataType.Type.TINYINT: "INT64", 767 exp.DataType.Type.ROWVERSION: "BYTES", 768 exp.DataType.Type.UUID: "STRING", 769 exp.DataType.Type.VARBINARY: "BYTES", 770 exp.DataType.Type.VARCHAR: "STRING", 771 exp.DataType.Type.VARIANT: "ANY TYPE", 772 } 773 774 PROPERTIES_LOCATION = { 775 **generator.Generator.PROPERTIES_LOCATION, 776 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 777 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 778 } 779 780 # WINDOW comes after QUALIFY 781 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 782 AFTER_HAVING_MODIFIER_TRANSFORMS = { 783 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 784 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 785 } 786 787 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 788 RESERVED_KEYWORDS = { 789 "all", 790 "and", 791 "any", 792 "array", 793 "as", 794 "asc", 795 "assert_rows_modified", 796 "at", 797 "between", 798 "by", 799 "case", 800 "cast", 801 "collate", 802 "contains", 803 "create", 804 "cross", 805 "cube", 806 "current", 807 "default", 808 "define", 809 "desc", 810 "distinct", 811 "else", 812 "end", 813 "enum", 814 "escape", 815 "except", 816 "exclude", 817 "exists", 818 "extract", 819 "false", 820 "fetch", 821 "following", 822 "for", 823 "from", 824 "full", 825 "group", 826 "grouping", 827 "groups", 828 "hash", 829 "having", 830 "if", 831 "ignore", 832 "in", 833 "inner", 834 "intersect", 835 "interval", 836 "into", 837 "is", 838 "join", 839 "lateral", 840 "left", 841 "like", 842 "limit", 843 "lookup", 844 "merge", 845 "natural", 846 "new", 847 "no", 848 "not", 849 "null", 850 "nulls", 851 "of", 852 "on", 853 "or", 854 "order", 855 "outer", 856 "over", 857 "partition", 858 "preceding", 859 "proto", 860 "qualify", 861 "range", 862 "recursive", 863 "respect", 864 "right", 865 "rollup", 866 "rows", 867 "select", 868 "set", 869 "some", 870 "struct", 871 "tablesample", 872 "then", 873 "to", 874 "treat", 875 "true", 876 "unbounded", 877 "union", 878 "unnest", 879 "using", 880 "when", 881 "where", 882 "window", 883 "with", 884 "within", 885 } 886 887 def mod_sql(self, expression: exp.Mod) -> str: 888 this = expression.this 889 expr = expression.expression 890 return self.func( 891 "MOD", 892 this.unnest() if isinstance(this, exp.Paren) else this, 893 expr.unnest() if isinstance(expr, exp.Paren) else expr, 894 ) 895 896 def column_parts(self, expression: exp.Column) -> str: 897 if expression.meta.get("quoted_column"): 898 # If a column reference is of the form `dataset.table`.name, we need 899 # to preserve the quoted table path, otherwise the reference breaks 900 table_parts = ".".join(p.name for p in expression.parts[:-1]) 901 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 902 return f"{table_path}.{self.sql(expression, 'this')}" 903 904 return super().column_parts(expression) 905 906 def table_parts(self, expression: exp.Table) -> str: 907 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 908 # we need to make sure the correct quoting is used in each case. 909 # 910 # For example, if there is a CTE x that clashes with a schema name, then the former will 911 # return the table y in that schema, whereas the latter will return the CTE's y column: 912 # 913 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 914 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 915 if expression.meta.get("quoted_table"): 916 table_parts = ".".join(p.name for p in expression.parts) 917 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 918 919 return super().table_parts(expression) 920 921 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 922 if isinstance(expression.this, exp.TsOrDsToTimestamp): 923 func_name = "FORMAT_DATETIME" 924 else: 925 func_name = "FORMAT_DATE" 926 this = ( 927 expression.this 928 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 929 else expression 930 ) 931 return self.func(func_name, self.format_time(expression), this.this) 932 933 def eq_sql(self, expression: exp.EQ) -> str: 934 # Operands of = cannot be NULL in BigQuery 935 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 936 if not isinstance(expression.parent, exp.Update): 937 return "NULL" 938 939 return self.binary(expression, "=") 940 941 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 942 parent = expression.parent 943 944 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 945 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 946 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 947 return self.func( 948 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 949 ) 950 951 return super().attimezone_sql(expression) 952 953 def trycast_sql(self, expression: exp.TryCast) -> str: 954 return self.cast_sql(expression, safe_prefix="SAFE_") 955 956 def bracket_sql(self, expression: exp.Bracket) -> str: 957 this = expression.this 958 expressions = expression.expressions 959 960 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 961 arg = expressions[0] 962 if arg.type is None: 963 from sqlglot.optimizer.annotate_types import annotate_types 964 965 arg = annotate_types(arg) 966 967 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 968 # BQ doesn't support bracket syntax with string values for structs 969 return f"{self.sql(this)}.{arg.name}" 970 971 expressions_sql = self.expressions(expression, flat=True) 972 offset = expression.args.get("offset") 973 974 if offset == 0: 975 expressions_sql = f"OFFSET({expressions_sql})" 976 elif offset == 1: 977 expressions_sql = f"ORDINAL({expressions_sql})" 978 elif offset is not None: 979 self.unsupported(f"Unsupported array offset: {offset}") 980 981 if expression.args.get("safe"): 982 expressions_sql = f"SAFE_{expressions_sql}" 983 984 return f"{self.sql(this)}[{expressions_sql}]" 985 986 def in_unnest_op(self, expression: exp.Unnest) -> str: 987 return self.sql(expression) 988 989 def version_sql(self, expression: exp.Version) -> str: 990 if expression.name == "TIMESTAMP": 991 expression.set("this", "SYSTEM_TIME") 992 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether alias reference expansion before qualification should only happen for the GROUP BY clause.
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
297 def normalize_identifier(self, expression: E) -> E: 298 if ( 299 isinstance(expression, exp.Identifier) 300 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 301 ): 302 parent = expression.parent 303 while isinstance(parent, exp.Dot): 304 parent = parent.parent 305 306 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 307 # by default. The following check uses a heuristic to detect tables based on whether 308 # they are qualified. This should generally be correct, because tables in BigQuery 309 # must be qualified with at least a dataset, unless @@dataset_id is set. 310 case_sensitive = ( 311 isinstance(parent, exp.UserDefinedFunction) 312 or ( 313 isinstance(parent, exp.Table) 314 and parent.db 315 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 316 ) 317 or expression.meta.get("is_table") 318 ) 319 if not case_sensitive: 320 expression.set("this", expression.this.lower()) 321 322 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- COPY_PARAMS_ARE_CSV
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
324 class Tokenizer(tokens.Tokenizer): 325 QUOTES = ["'", '"', '"""', "'''"] 326 COMMENTS = ["--", "#", ("/*", "*/")] 327 IDENTIFIERS = ["`"] 328 STRING_ESCAPES = ["\\"] 329 330 HEX_STRINGS = [("0x", ""), ("0X", "")] 331 332 BYTE_STRINGS = [ 333 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 334 ] 335 336 RAW_STRINGS = [ 337 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 338 ] 339 340 KEYWORDS = { 341 **tokens.Tokenizer.KEYWORDS, 342 "ANY TYPE": TokenType.VARIANT, 343 "BEGIN": TokenType.COMMAND, 344 "BEGIN TRANSACTION": TokenType.BEGIN, 345 "BYTEINT": TokenType.INT, 346 "BYTES": TokenType.BINARY, 347 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 348 "DATETIME": TokenType.TIMESTAMP, 349 "DECLARE": TokenType.COMMAND, 350 "ELSEIF": TokenType.COMMAND, 351 "EXCEPTION": TokenType.COMMAND, 352 "FLOAT64": TokenType.DOUBLE, 353 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 354 "MODEL": TokenType.MODEL, 355 "NOT DETERMINISTIC": TokenType.VOLATILE, 356 "RECORD": TokenType.STRUCT, 357 "TIMESTAMP": TokenType.TIMESTAMPTZ, 358 } 359 KEYWORDS.pop("DIV") 360 KEYWORDS.pop("VALUES") 361 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
363 class Parser(parser.Parser): 364 PREFIXED_PIVOT_COLUMNS = True 365 LOG_DEFAULTS_TO_LN = True 366 SUPPORTS_IMPLICIT_UNNEST = True 367 368 FUNCTIONS = { 369 **parser.Parser.FUNCTIONS, 370 "DATE": _build_date, 371 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 372 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 373 "DATE_TRUNC": lambda args: exp.DateTrunc( 374 unit=exp.Literal.string(str(seq_get(args, 1))), 375 this=seq_get(args, 0), 376 ), 377 "DATETIME": _build_datetime, 378 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 379 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 380 "DIV": binary_from_function(exp.IntDiv), 381 "FORMAT_DATE": lambda args: exp.TimeToStr( 382 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 383 ), 384 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 385 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 386 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 387 ), 388 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 389 "MD5": exp.MD5Digest.from_arg_list, 390 "TO_HEX": _build_to_hex, 391 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 392 [seq_get(args, 1), seq_get(args, 0)] 393 ), 394 "PARSE_TIMESTAMP": _build_parse_timestamp, 395 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 396 "REGEXP_EXTRACT": _build_regexp_extract, 397 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 398 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 399 "SPLIT": lambda args: exp.Split( 400 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 401 this=seq_get(args, 0), 402 expression=seq_get(args, 1) or exp.Literal.string(","), 403 ), 404 "TIME": _build_time, 405 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 406 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 407 "TIMESTAMP": _build_timestamp, 408 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 409 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 410 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 411 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 412 ), 413 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 414 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 415 ), 416 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 417 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 418 "FORMAT_DATETIME": lambda args: exp.TimeToStr( 419 this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0) 420 ), 421 } 422 423 FUNCTION_PARSERS = { 424 **parser.Parser.FUNCTION_PARSERS, 425 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 426 } 427 FUNCTION_PARSERS.pop("TRIM") 428 429 NO_PAREN_FUNCTIONS = { 430 **parser.Parser.NO_PAREN_FUNCTIONS, 431 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 432 } 433 434 NESTED_TYPE_TOKENS = { 435 *parser.Parser.NESTED_TYPE_TOKENS, 436 TokenType.TABLE, 437 } 438 439 PROPERTY_PARSERS = { 440 **parser.Parser.PROPERTY_PARSERS, 441 "NOT DETERMINISTIC": lambda self: self.expression( 442 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 443 ), 444 "OPTIONS": lambda self: self._parse_with_property(), 445 } 446 447 CONSTRAINT_PARSERS = { 448 **parser.Parser.CONSTRAINT_PARSERS, 449 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 450 } 451 452 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 453 RANGE_PARSERS.pop(TokenType.OVERLAPS) 454 455 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 456 457 STATEMENT_PARSERS = { 458 **parser.Parser.STATEMENT_PARSERS, 459 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 460 TokenType.END: lambda self: self._parse_as_command(self._prev), 461 TokenType.FOR: lambda self: self._parse_for_in(), 462 } 463 464 BRACKET_OFFSETS = { 465 "OFFSET": (0, False), 466 "ORDINAL": (1, False), 467 "SAFE_OFFSET": (0, True), 468 "SAFE_ORDINAL": (1, True), 469 } 470 471 def _parse_for_in(self) -> exp.ForIn: 472 this = self._parse_range() 473 self._match_text_seq("DO") 474 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 475 476 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 477 this = super()._parse_table_part(schema=schema) or self._parse_number() 478 479 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 480 if isinstance(this, exp.Identifier): 481 table_name = this.name 482 while self._match(TokenType.DASH, advance=False) and self._next: 483 text = "" 484 while self._curr and self._curr.token_type != TokenType.DOT: 485 self._advance() 486 text += self._prev.text 487 table_name += text 488 489 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 490 elif isinstance(this, exp.Literal): 491 table_name = this.name 492 493 if self._is_connected() and self._parse_var(any_token=True): 494 table_name += self._prev.text 495 496 this = exp.Identifier(this=table_name, quoted=True) 497 498 return this 499 500 def _parse_table_parts( 501 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 502 ) -> exp.Table: 503 table = super()._parse_table_parts( 504 schema=schema, is_db_reference=is_db_reference, wildcard=True 505 ) 506 507 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 508 if not table.catalog: 509 if table.db: 510 parts = table.db.split(".") 511 if len(parts) == 2 and not table.args["db"].quoted: 512 table.set("catalog", exp.Identifier(this=parts[0])) 513 table.set("db", exp.Identifier(this=parts[1])) 514 else: 515 parts = table.name.split(".") 516 if len(parts) == 2 and not table.this.quoted: 517 table.set("db", exp.Identifier(this=parts[0])) 518 table.set("this", exp.Identifier(this=parts[1])) 519 520 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 521 catalog, db, this, *rest = ( 522 exp.to_identifier(p, quoted=True) 523 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 524 ) 525 526 if rest and this: 527 this = exp.Dot.build([this, *rest]) # type: ignore 528 529 table = exp.Table( 530 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 531 ) 532 table.meta["quoted_table"] = True 533 534 return table 535 536 def _parse_column(self) -> t.Optional[exp.Expression]: 537 column = super()._parse_column() 538 if isinstance(column, exp.Column): 539 parts = column.parts 540 if any("." in p.name for p in parts): 541 catalog, db, table, this, *rest = ( 542 exp.to_identifier(p, quoted=True) 543 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 544 ) 545 546 if rest and this: 547 this = exp.Dot.build([this, *rest]) # type: ignore 548 549 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 550 column.meta["quoted_column"] = True 551 552 return column 553 554 @t.overload 555 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 556 557 @t.overload 558 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 559 560 def _parse_json_object(self, agg=False): 561 json_object = super()._parse_json_object() 562 array_kv_pair = seq_get(json_object.expressions, 0) 563 564 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 565 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 566 if ( 567 array_kv_pair 568 and isinstance(array_kv_pair.this, exp.Array) 569 and isinstance(array_kv_pair.expression, exp.Array) 570 ): 571 keys = array_kv_pair.this.expressions 572 values = array_kv_pair.expression.expressions 573 574 json_object.set( 575 "expressions", 576 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 577 ) 578 579 return json_object 580 581 def _parse_bracket( 582 self, this: t.Optional[exp.Expression] = None 583 ) -> t.Optional[exp.Expression]: 584 bracket = super()._parse_bracket(this) 585 586 if this is bracket: 587 return bracket 588 589 if isinstance(bracket, exp.Bracket): 590 for expression in bracket.expressions: 591 name = expression.name.upper() 592 593 if name not in self.BRACKET_OFFSETS: 594 break 595 596 offset, safe = self.BRACKET_OFFSETS[name] 597 bracket.set("offset", offset) 598 bracket.set("safe", safe) 599 expression.replace(expression.expressions[0]) 600 601 return bracket 602 603 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 604 unnest = super()._parse_unnest(with_alias=with_alias) 605 606 if not unnest: 607 return None 608 609 unnest_expr = seq_get(unnest.expressions, 0) 610 if unnest_expr: 611 from sqlglot.optimizer.annotate_types import annotate_types 612 613 unnest_expr = annotate_types(unnest_expr) 614 615 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 616 # in contrast to other dialects such as DuckDB which flattens only the array by default 617 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 618 array_elem.is_type(exp.DataType.Type.STRUCT) 619 for array_elem in unnest_expr._type.expressions 620 ): 621 unnest.set("explode_array", True) 622 623 return unnest
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
625 class Generator(generator.Generator): 626 INTERVAL_ALLOWS_PLURAL_FORM = False 627 JOIN_HINTS = False 628 QUERY_HINTS = False 629 TABLE_HINTS = False 630 LIMIT_FETCH = "LIMIT" 631 RENAME_TABLE_WITH_DB = False 632 NVL2_SUPPORTED = False 633 UNNEST_WITH_ORDINALITY = False 634 COLLATE_IS_FUNC = True 635 LIMIT_ONLY_LITERALS = True 636 SUPPORTS_TABLE_ALIAS_COLUMNS = False 637 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 638 JSON_KEY_VALUE_PAIR_SEP = "," 639 NULL_ORDERING_SUPPORTED = False 640 IGNORE_NULLS_IN_FUNC = True 641 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 642 CAN_IMPLEMENT_ARRAY_ANY = True 643 SUPPORTS_TO_NUMBER = False 644 NAMED_PLACEHOLDER_TOKEN = "@" 645 HEX_FUNC = "TO_HEX" 646 WITH_PROPERTIES_PREFIX = "OPTIONS" 647 SUPPORTS_EXPLODING_PROJECTIONS = False 648 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 649 650 TRANSFORMS = { 651 **generator.Generator.TRANSFORMS, 652 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 653 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 654 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 655 exp.Array: inline_array_unless_query, 656 exp.ArrayContains: _array_contains_sql, 657 exp.ArrayFilter: filter_array_using_unnest, 658 exp.ArraySize: rename_func("ARRAY_LENGTH"), 659 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 660 exp.CollateProperty: lambda self, e: ( 661 f"DEFAULT COLLATE {self.sql(e, 'this')}" 662 if e.args.get("default") 663 else f"COLLATE {self.sql(e, 'this')}" 664 ), 665 exp.Commit: lambda *_: "COMMIT TRANSACTION", 666 exp.CountIf: rename_func("COUNTIF"), 667 exp.Create: _create_sql, 668 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 669 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 670 exp.DateDiff: lambda self, e: self.func( 671 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 672 ), 673 exp.DateFromParts: rename_func("DATE"), 674 exp.DateStrToDate: datestrtodate_sql, 675 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 676 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 677 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 678 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 679 exp.FromTimeZone: lambda self, e: self.func( 680 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 681 ), 682 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 683 exp.GroupConcat: rename_func("STRING_AGG"), 684 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 685 exp.If: if_sql(false_value="NULL"), 686 exp.ILike: no_ilike_sql, 687 exp.IntDiv: rename_func("DIV"), 688 exp.JSONFormat: rename_func("TO_JSON_STRING"), 689 exp.Max: max_or_greatest, 690 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 691 exp.MD5Digest: rename_func("MD5"), 692 exp.Min: min_or_least, 693 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 694 exp.RegexpExtract: lambda self, e: self.func( 695 "REGEXP_EXTRACT", 696 e.this, 697 e.expression, 698 e.args.get("position"), 699 e.args.get("occurrence"), 700 ), 701 exp.RegexpReplace: regexp_replace_sql, 702 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 703 exp.ReturnsProperty: _returnsproperty_sql, 704 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 705 exp.Select: transforms.preprocess( 706 [ 707 transforms.explode_to_unnest(), 708 transforms.unqualify_unnest, 709 transforms.eliminate_distinct_on, 710 _alias_ordered_group, 711 transforms.eliminate_semi_and_anti_joins, 712 ] 713 ), 714 exp.SHA: rename_func("SHA1"), 715 exp.SHA2: sha256_sql, 716 exp.StabilityProperty: lambda self, e: ( 717 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 718 ), 719 exp.StrToDate: _str_to_datetime_sql, 720 exp.StrToTime: _str_to_datetime_sql, 721 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 722 exp.TimeFromParts: rename_func("TIME"), 723 exp.TimestampFromParts: rename_func("DATETIME"), 724 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 725 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 726 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 727 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 728 exp.TimeStrToTime: timestrtotime_sql, 729 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 730 exp.TsOrDsAdd: _ts_or_ds_add_sql, 731 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 732 exp.TsOrDsToTime: rename_func("TIME"), 733 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 734 exp.Unhex: rename_func("FROM_HEX"), 735 exp.UnixDate: rename_func("UNIX_DATE"), 736 exp.UnixToTime: _unix_to_time_sql, 737 exp.Uuid: lambda *_: "GENERATE_UUID()", 738 exp.Values: _derived_table_values_to_unnest, 739 exp.VariancePop: rename_func("VAR_POP"), 740 } 741 742 SUPPORTED_JSON_PATH_PARTS = { 743 exp.JSONPathKey, 744 exp.JSONPathRoot, 745 exp.JSONPathSubscript, 746 } 747 748 TYPE_MAPPING = { 749 **generator.Generator.TYPE_MAPPING, 750 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 751 exp.DataType.Type.BIGINT: "INT64", 752 exp.DataType.Type.BINARY: "BYTES", 753 exp.DataType.Type.BOOLEAN: "BOOL", 754 exp.DataType.Type.CHAR: "STRING", 755 exp.DataType.Type.DECIMAL: "NUMERIC", 756 exp.DataType.Type.DOUBLE: "FLOAT64", 757 exp.DataType.Type.FLOAT: "FLOAT64", 758 exp.DataType.Type.INT: "INT64", 759 exp.DataType.Type.NCHAR: "STRING", 760 exp.DataType.Type.NVARCHAR: "STRING", 761 exp.DataType.Type.SMALLINT: "INT64", 762 exp.DataType.Type.TEXT: "STRING", 763 exp.DataType.Type.TIMESTAMP: "DATETIME", 764 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 765 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 766 exp.DataType.Type.TINYINT: "INT64", 767 exp.DataType.Type.ROWVERSION: "BYTES", 768 exp.DataType.Type.UUID: "STRING", 769 exp.DataType.Type.VARBINARY: "BYTES", 770 exp.DataType.Type.VARCHAR: "STRING", 771 exp.DataType.Type.VARIANT: "ANY TYPE", 772 } 773 774 PROPERTIES_LOCATION = { 775 **generator.Generator.PROPERTIES_LOCATION, 776 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 777 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 778 } 779 780 # WINDOW comes after QUALIFY 781 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 782 AFTER_HAVING_MODIFIER_TRANSFORMS = { 783 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 784 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 785 } 786 787 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 788 RESERVED_KEYWORDS = { 789 "all", 790 "and", 791 "any", 792 "array", 793 "as", 794 "asc", 795 "assert_rows_modified", 796 "at", 797 "between", 798 "by", 799 "case", 800 "cast", 801 "collate", 802 "contains", 803 "create", 804 "cross", 805 "cube", 806 "current", 807 "default", 808 "define", 809 "desc", 810 "distinct", 811 "else", 812 "end", 813 "enum", 814 "escape", 815 "except", 816 "exclude", 817 "exists", 818 "extract", 819 "false", 820 "fetch", 821 "following", 822 "for", 823 "from", 824 "full", 825 "group", 826 "grouping", 827 "groups", 828 "hash", 829 "having", 830 "if", 831 "ignore", 832 "in", 833 "inner", 834 "intersect", 835 "interval", 836 "into", 837 "is", 838 "join", 839 "lateral", 840 "left", 841 "like", 842 "limit", 843 "lookup", 844 "merge", 845 "natural", 846 "new", 847 "no", 848 "not", 849 "null", 850 "nulls", 851 "of", 852 "on", 853 "or", 854 "order", 855 "outer", 856 "over", 857 "partition", 858 "preceding", 859 "proto", 860 "qualify", 861 "range", 862 "recursive", 863 "respect", 864 "right", 865 "rollup", 866 "rows", 867 "select", 868 "set", 869 "some", 870 "struct", 871 "tablesample", 872 "then", 873 "to", 874 "treat", 875 "true", 876 "unbounded", 877 "union", 878 "unnest", 879 "using", 880 "when", 881 "where", 882 "window", 883 "with", 884 "within", 885 } 886 887 def mod_sql(self, expression: exp.Mod) -> str: 888 this = expression.this 889 expr = expression.expression 890 return self.func( 891 "MOD", 892 this.unnest() if isinstance(this, exp.Paren) else this, 893 expr.unnest() if isinstance(expr, exp.Paren) else expr, 894 ) 895 896 def column_parts(self, expression: exp.Column) -> str: 897 if expression.meta.get("quoted_column"): 898 # If a column reference is of the form `dataset.table`.name, we need 899 # to preserve the quoted table path, otherwise the reference breaks 900 table_parts = ".".join(p.name for p in expression.parts[:-1]) 901 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 902 return f"{table_path}.{self.sql(expression, 'this')}" 903 904 return super().column_parts(expression) 905 906 def table_parts(self, expression: exp.Table) -> str: 907 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 908 # we need to make sure the correct quoting is used in each case. 909 # 910 # For example, if there is a CTE x that clashes with a schema name, then the former will 911 # return the table y in that schema, whereas the latter will return the CTE's y column: 912 # 913 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 914 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 915 if expression.meta.get("quoted_table"): 916 table_parts = ".".join(p.name for p in expression.parts) 917 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 918 919 return super().table_parts(expression) 920 921 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 922 if isinstance(expression.this, exp.TsOrDsToTimestamp): 923 func_name = "FORMAT_DATETIME" 924 else: 925 func_name = "FORMAT_DATE" 926 this = ( 927 expression.this 928 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 929 else expression 930 ) 931 return self.func(func_name, self.format_time(expression), this.this) 932 933 def eq_sql(self, expression: exp.EQ) -> str: 934 # Operands of = cannot be NULL in BigQuery 935 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 936 if not isinstance(expression.parent, exp.Update): 937 return "NULL" 938 939 return self.binary(expression, "=") 940 941 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 942 parent = expression.parent 943 944 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 945 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 946 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 947 return self.func( 948 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 949 ) 950 951 return super().attimezone_sql(expression) 952 953 def trycast_sql(self, expression: exp.TryCast) -> str: 954 return self.cast_sql(expression, safe_prefix="SAFE_") 955 956 def bracket_sql(self, expression: exp.Bracket) -> str: 957 this = expression.this 958 expressions = expression.expressions 959 960 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 961 arg = expressions[0] 962 if arg.type is None: 963 from sqlglot.optimizer.annotate_types import annotate_types 964 965 arg = annotate_types(arg) 966 967 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 968 # BQ doesn't support bracket syntax with string values for structs 969 return f"{self.sql(this)}.{arg.name}" 970 971 expressions_sql = self.expressions(expression, flat=True) 972 offset = expression.args.get("offset") 973 974 if offset == 0: 975 expressions_sql = f"OFFSET({expressions_sql})" 976 elif offset == 1: 977 expressions_sql = f"ORDINAL({expressions_sql})" 978 elif offset is not None: 979 self.unsupported(f"Unsupported array offset: {offset}") 980 981 if expression.args.get("safe"): 982 expressions_sql = f"SAFE_{expressions_sql}" 983 984 return f"{self.sql(this)}[{expressions_sql}]" 985 986 def in_unnest_op(self, expression: exp.Unnest) -> str: 987 return self.sql(expression) 988 989 def version_sql(self, expression: exp.Version) -> str: 990 if expression.name == "TIMESTAMP": 991 expression.set("this", "SYSTEM_TIME") 992 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
896 def column_parts(self, expression: exp.Column) -> str: 897 if expression.meta.get("quoted_column"): 898 # If a column reference is of the form `dataset.table`.name, we need 899 # to preserve the quoted table path, otherwise the reference breaks 900 table_parts = ".".join(p.name for p in expression.parts[:-1]) 901 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 902 return f"{table_path}.{self.sql(expression, 'this')}" 903 904 return super().column_parts(expression)
906 def table_parts(self, expression: exp.Table) -> str: 907 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 908 # we need to make sure the correct quoting is used in each case. 909 # 910 # For example, if there is a CTE x that clashes with a schema name, then the former will 911 # return the table y in that schema, whereas the latter will return the CTE's y column: 912 # 913 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 914 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 915 if expression.meta.get("quoted_table"): 916 table_parts = ".".join(p.name for p in expression.parts) 917 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 918 919 return super().table_parts(expression)
921 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 922 if isinstance(expression.this, exp.TsOrDsToTimestamp): 923 func_name = "FORMAT_DATETIME" 924 else: 925 func_name = "FORMAT_DATE" 926 this = ( 927 expression.this 928 if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 929 else expression 930 ) 931 return self.func(func_name, self.format_time(expression), this.this)
941 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 942 parent = expression.parent 943 944 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 945 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 946 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 947 return self.func( 948 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 949 ) 950 951 return super().attimezone_sql(expression)
956 def bracket_sql(self, expression: exp.Bracket) -> str: 957 this = expression.this 958 expressions = expression.expressions 959 960 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 961 arg = expressions[0] 962 if arg.type is None: 963 from sqlglot.optimizer.annotate_types import annotate_types 964 965 arg = annotate_types(arg) 966 967 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 968 # BQ doesn't support bracket syntax with string values for structs 969 return f"{self.sql(this)}.{arg.name}" 970 971 expressions_sql = self.expressions(expression, flat=True) 972 offset = expression.args.get("offset") 973 974 if offset == 0: 975 expressions_sql = f"OFFSET({expressions_sql})" 976 elif offset == 1: 977 expressions_sql = f"ORDINAL({expressions_sql})" 978 elif offset is not None: 979 self.unsupported(f"Unsupported array offset: {offset}") 980 981 if expression.args.get("safe"): 982 expressions_sql = f"SAFE_{expressions_sql}" 983 984 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql