sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_sql, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 timestrtotime_sql, 26 ts_or_ds_add_cast, 27) 28from sqlglot.helper import seq_get, split_num_words 29from sqlglot.tokens import TokenType 30 31if t.TYPE_CHECKING: 32 from sqlglot._typing import E, Lit 33 34logger = logging.getLogger("sqlglot") 35 36 37def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 38 if not expression.find_ancestor(exp.From, exp.Join): 39 return self.values_sql(expression) 40 41 structs = [] 42 alias = expression.args.get("alias") 43 for tup in expression.find_all(exp.Tuple): 44 field_aliases = alias.columns if alias else (f"_c{i}" for i in range(len(tup.expressions))) 45 expressions = [exp.alias_(fld, name) for fld, name in zip(tup.expressions, field_aliases)] 46 structs.append(exp.Struct(expressions=expressions)) 47 48 return self.unnest_sql(exp.Unnest(expressions=[exp.array(*structs, copy=False)])) 49 50 51def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 52 this = expression.this 53 if isinstance(this, exp.Schema): 54 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 55 else: 56 this = self.sql(this) 57 return f"RETURNS {this}" 58 59 60def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 61 returns = expression.find(exp.ReturnsProperty) 62 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 63 expression.set("kind", "TABLE FUNCTION") 64 65 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 66 expression.set("expression", expression.expression.this) 67 68 return self.create_sql(expression) 69 70 71def _unqualify_unnest(expression: exp.Expression) -> exp.Expression: 72 """Remove references to unnest table aliases since bigquery doesn't allow them. 73 74 These are added by the optimizer's qualify_column step. 75 """ 76 from sqlglot.optimizer.scope import find_all_in_scope 77 78 if isinstance(expression, exp.Select): 79 unnest_aliases = { 80 unnest.alias 81 for unnest in find_all_in_scope(expression, exp.Unnest) 82 if isinstance(unnest.parent, (exp.From, exp.Join)) 83 } 84 if unnest_aliases: 85 for column in expression.find_all(exp.Column): 86 if column.table in unnest_aliases: 87 column.set("table", None) 88 elif column.db in unnest_aliases: 89 column.set("db", None) 90 91 return expression 92 93 94# https://issuetracker.google.com/issues/162294746 95# workaround for bigquery bug when grouping by an expression and then ordering 96# WITH x AS (SELECT 1 y) 97# SELECT y + 1 z 98# FROM x 99# GROUP BY x + 1 100# ORDER by z 101def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 102 if isinstance(expression, exp.Select): 103 group = expression.args.get("group") 104 order = expression.args.get("order") 105 106 if group and order: 107 aliases = { 108 select.this: select.args["alias"] 109 for select in expression.selects 110 if isinstance(select, exp.Alias) 111 } 112 113 for grouped in group.expressions: 114 alias = aliases.get(grouped) 115 if alias: 116 grouped.replace(exp.column(alias)) 117 118 return expression 119 120 121def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 122 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 123 if isinstance(expression, exp.CTE) and expression.alias_column_names: 124 cte_query = expression.this 125 126 if cte_query.is_star: 127 logger.warning( 128 "Can't push down CTE column names for star queries. Run the query through" 129 " the optimizer or use 'qualify' to expand the star projections first." 130 ) 131 return expression 132 133 column_names = expression.alias_column_names 134 expression.args["alias"].set("columns", None) 135 136 for name, select in zip(column_names, cte_query.selects): 137 to_replace = select 138 139 if isinstance(select, exp.Alias): 140 select = select.this 141 142 # Inner aliases are shadowed by the CTE column names 143 to_replace.replace(exp.alias_(select, name)) 144 145 return expression 146 147 148def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 149 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 150 this.set("zone", seq_get(args, 2)) 151 return this 152 153 154def _build_timestamp(args: t.List) -> exp.Timestamp: 155 timestamp = exp.Timestamp.from_arg_list(args) 156 timestamp.set("with_tz", True) 157 return timestamp 158 159 160def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 161 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 162 return expr_type.from_arg_list(args) 163 164 165def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 166 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 167 arg = seq_get(args, 0) 168 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.Hex(this=arg) 169 170 171def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 172 return self.sql( 173 exp.Exists( 174 this=exp.select("1") 175 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 176 .where(exp.column("_col").eq(expression.right)) 177 ) 178 ) 179 180 181def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 182 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 183 184 185def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 186 expression.this.replace(exp.cast(expression.this, "TIMESTAMP", copy=True)) 187 expression.expression.replace(exp.cast(expression.expression, "TIMESTAMP", copy=True)) 188 unit = expression.args.get("unit") or "DAY" 189 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 190 191 192def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 193 scale = expression.args.get("scale") 194 timestamp = expression.this 195 196 if scale in (None, exp.UnixToTime.SECONDS): 197 return self.func("TIMESTAMP_SECONDS", timestamp) 198 if scale == exp.UnixToTime.MILLIS: 199 return self.func("TIMESTAMP_MILLIS", timestamp) 200 if scale == exp.UnixToTime.MICROS: 201 return self.func("TIMESTAMP_MICROS", timestamp) 202 203 unix_seconds = exp.cast(exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), "int64") 204 return self.func("TIMESTAMP_SECONDS", unix_seconds) 205 206 207def _build_time(args: t.List) -> exp.Func: 208 if len(args) == 1: 209 return exp.TsOrDsToTime(this=args[0]) 210 if len(args) == 3: 211 return exp.TimeFromParts.from_arg_list(args) 212 213 return exp.Anonymous(this="TIME", expressions=args) 214 215 216class BigQuery(Dialect): 217 WEEK_OFFSET = -1 218 UNNEST_COLUMN_ONLY = True 219 SUPPORTS_USER_DEFINED_TYPES = False 220 SUPPORTS_SEMI_ANTI_JOIN = False 221 LOG_BASE_FIRST = False 222 223 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 224 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 225 226 # bigquery udfs are case sensitive 227 NORMALIZE_FUNCTIONS = False 228 229 TIME_MAPPING = { 230 "%D": "%m/%d/%y", 231 } 232 233 ESCAPE_SEQUENCES = { 234 "\\a": "\a", 235 "\\b": "\b", 236 "\\f": "\f", 237 "\\n": "\n", 238 "\\r": "\r", 239 "\\t": "\t", 240 "\\v": "\v", 241 } 242 243 FORMAT_MAPPING = { 244 "DD": "%d", 245 "MM": "%m", 246 "MON": "%b", 247 "MONTH": "%B", 248 "YYYY": "%Y", 249 "YY": "%y", 250 "HH": "%I", 251 "HH12": "%I", 252 "HH24": "%H", 253 "MI": "%M", 254 "SS": "%S", 255 "SSSSS": "%f", 256 "TZH": "%z", 257 } 258 259 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 260 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 261 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 262 263 def normalize_identifier(self, expression: E) -> E: 264 if isinstance(expression, exp.Identifier): 265 parent = expression.parent 266 while isinstance(parent, exp.Dot): 267 parent = parent.parent 268 269 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 270 # The following check is essentially a heuristic to detect tables based on whether or 271 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 272 if ( 273 not isinstance(parent, exp.UserDefinedFunction) 274 and not (isinstance(parent, exp.Table) and parent.db) 275 and not expression.meta.get("is_table") 276 ): 277 expression.set("this", expression.this.lower()) 278 279 return expression 280 281 class Tokenizer(tokens.Tokenizer): 282 QUOTES = ["'", '"', '"""', "'''"] 283 COMMENTS = ["--", "#", ("/*", "*/")] 284 IDENTIFIERS = ["`"] 285 STRING_ESCAPES = ["\\"] 286 287 HEX_STRINGS = [("0x", ""), ("0X", "")] 288 289 BYTE_STRINGS = [ 290 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 291 ] 292 293 RAW_STRINGS = [ 294 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 295 ] 296 297 KEYWORDS = { 298 **tokens.Tokenizer.KEYWORDS, 299 "ANY TYPE": TokenType.VARIANT, 300 "BEGIN": TokenType.COMMAND, 301 "BEGIN TRANSACTION": TokenType.BEGIN, 302 "BYTES": TokenType.BINARY, 303 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 304 "DECLARE": TokenType.COMMAND, 305 "EXCEPTION": TokenType.COMMAND, 306 "FLOAT64": TokenType.DOUBLE, 307 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 308 "MODEL": TokenType.MODEL, 309 "NOT DETERMINISTIC": TokenType.VOLATILE, 310 "RECORD": TokenType.STRUCT, 311 "TIMESTAMP": TokenType.TIMESTAMPTZ, 312 } 313 KEYWORDS.pop("DIV") 314 KEYWORDS.pop("VALUES") 315 316 class Parser(parser.Parser): 317 PREFIXED_PIVOT_COLUMNS = True 318 319 LOG_DEFAULTS_TO_LN = True 320 321 FUNCTIONS = { 322 **parser.Parser.FUNCTIONS, 323 "DATE": _build_date, 324 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 325 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 326 "DATE_TRUNC": lambda args: exp.DateTrunc( 327 unit=exp.Literal.string(str(seq_get(args, 1))), 328 this=seq_get(args, 0), 329 ), 330 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 331 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 332 "DIV": binary_from_function(exp.IntDiv), 333 "FORMAT_DATE": lambda args: exp.TimeToStr( 334 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 335 ), 336 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 337 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 338 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 339 ), 340 "MD5": exp.MD5Digest.from_arg_list, 341 "TO_HEX": _build_to_hex, 342 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 343 [seq_get(args, 1), seq_get(args, 0)] 344 ), 345 "PARSE_TIMESTAMP": _build_parse_timestamp, 346 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 347 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 348 this=seq_get(args, 0), 349 expression=seq_get(args, 1), 350 position=seq_get(args, 2), 351 occurrence=seq_get(args, 3), 352 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 353 ), 354 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 355 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 356 "SPLIT": lambda args: exp.Split( 357 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 358 this=seq_get(args, 0), 359 expression=seq_get(args, 1) or exp.Literal.string(","), 360 ), 361 "TIME": _build_time, 362 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 363 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 364 "TIMESTAMP": _build_timestamp, 365 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 366 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 367 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 368 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 369 ), 370 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 371 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 372 ), 373 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 374 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 375 } 376 377 FUNCTION_PARSERS = { 378 **parser.Parser.FUNCTION_PARSERS, 379 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 380 } 381 FUNCTION_PARSERS.pop("TRIM") 382 383 NO_PAREN_FUNCTIONS = { 384 **parser.Parser.NO_PAREN_FUNCTIONS, 385 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 386 } 387 388 NESTED_TYPE_TOKENS = { 389 *parser.Parser.NESTED_TYPE_TOKENS, 390 TokenType.TABLE, 391 } 392 393 PROPERTY_PARSERS = { 394 **parser.Parser.PROPERTY_PARSERS, 395 "NOT DETERMINISTIC": lambda self: self.expression( 396 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 397 ), 398 "OPTIONS": lambda self: self._parse_with_property(), 399 } 400 401 CONSTRAINT_PARSERS = { 402 **parser.Parser.CONSTRAINT_PARSERS, 403 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 404 } 405 406 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 407 RANGE_PARSERS.pop(TokenType.OVERLAPS) 408 409 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 410 411 STATEMENT_PARSERS = { 412 **parser.Parser.STATEMENT_PARSERS, 413 TokenType.END: lambda self: self._parse_as_command(self._prev), 414 TokenType.FOR: lambda self: self._parse_for_in(), 415 } 416 417 BRACKET_OFFSETS = { 418 "OFFSET": (0, False), 419 "ORDINAL": (1, False), 420 "SAFE_OFFSET": (0, True), 421 "SAFE_ORDINAL": (1, True), 422 } 423 424 def _parse_for_in(self) -> exp.ForIn: 425 this = self._parse_range() 426 self._match_text_seq("DO") 427 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 428 429 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 430 this = super()._parse_table_part(schema=schema) or self._parse_number() 431 432 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 433 if isinstance(this, exp.Identifier): 434 table_name = this.name 435 while self._match(TokenType.DASH, advance=False) and self._next: 436 self._advance(2) 437 table_name += f"-{self._prev.text}" 438 439 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 440 elif isinstance(this, exp.Literal): 441 table_name = this.name 442 443 if self._is_connected() and self._parse_var(any_token=True): 444 table_name += self._prev.text 445 446 this = exp.Identifier(this=table_name, quoted=True) 447 448 return this 449 450 def _parse_table_parts( 451 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 452 ) -> exp.Table: 453 table = super()._parse_table_parts( 454 schema=schema, is_db_reference=is_db_reference, wildcard=True 455 ) 456 457 if isinstance(table.this, exp.Identifier) and "." in table.name: 458 catalog, db, this, *rest = ( 459 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 460 for x in split_num_words(table.name, ".", 3) 461 ) 462 463 if rest and this: 464 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 465 466 table = exp.Table(this=this, db=db, catalog=catalog) 467 468 return table 469 470 @t.overload 471 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 472 ... 473 474 @t.overload 475 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 476 ... 477 478 def _parse_json_object(self, agg=False): 479 json_object = super()._parse_json_object() 480 array_kv_pair = seq_get(json_object.expressions, 0) 481 482 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 483 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 484 if ( 485 array_kv_pair 486 and isinstance(array_kv_pair.this, exp.Array) 487 and isinstance(array_kv_pair.expression, exp.Array) 488 ): 489 keys = array_kv_pair.this.expressions 490 values = array_kv_pair.expression.expressions 491 492 json_object.set( 493 "expressions", 494 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 495 ) 496 497 return json_object 498 499 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 500 bracket = super()._parse_bracket(this) 501 502 if this is bracket: 503 return bracket 504 505 if isinstance(bracket, exp.Bracket): 506 for expression in bracket.expressions: 507 name = expression.name.upper() 508 509 if name not in self.BRACKET_OFFSETS: 510 break 511 512 offset, safe = self.BRACKET_OFFSETS[name] 513 bracket.set("offset", offset) 514 bracket.set("safe", safe) 515 expression.replace(expression.expressions[0]) 516 517 return bracket 518 519 class Generator(generator.Generator): 520 EXPLICIT_UNION = True 521 INTERVAL_ALLOWS_PLURAL_FORM = False 522 JOIN_HINTS = False 523 QUERY_HINTS = False 524 TABLE_HINTS = False 525 LIMIT_FETCH = "LIMIT" 526 RENAME_TABLE_WITH_DB = False 527 NVL2_SUPPORTED = False 528 UNNEST_WITH_ORDINALITY = False 529 COLLATE_IS_FUNC = True 530 LIMIT_ONLY_LITERALS = True 531 SUPPORTS_TABLE_ALIAS_COLUMNS = False 532 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 533 JSON_KEY_VALUE_PAIR_SEP = "," 534 NULL_ORDERING_SUPPORTED = False 535 IGNORE_NULLS_IN_FUNC = True 536 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 537 CAN_IMPLEMENT_ARRAY_ANY = True 538 NAMED_PLACEHOLDER_TOKEN = "@" 539 540 TRANSFORMS = { 541 **generator.Generator.TRANSFORMS, 542 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 543 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 544 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 545 exp.ArrayContains: _array_contains_sql, 546 exp.ArrayFilter: filter_array_using_unnest, 547 exp.ArraySize: rename_func("ARRAY_LENGTH"), 548 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 549 exp.CollateProperty: lambda self, e: ( 550 f"DEFAULT COLLATE {self.sql(e, 'this')}" 551 if e.args.get("default") 552 else f"COLLATE {self.sql(e, 'this')}" 553 ), 554 exp.Commit: lambda *_: "COMMIT TRANSACTION", 555 exp.CountIf: rename_func("COUNTIF"), 556 exp.Create: _create_sql, 557 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 558 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 559 exp.DateDiff: lambda self, e: self.func( 560 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 561 ), 562 exp.DateFromParts: rename_func("DATE"), 563 exp.DateStrToDate: datestrtodate_sql, 564 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 565 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 566 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 567 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 568 exp.FromTimeZone: lambda self, e: self.func( 569 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 570 ), 571 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 572 exp.GroupConcat: rename_func("STRING_AGG"), 573 exp.Hex: rename_func("TO_HEX"), 574 exp.If: if_sql(false_value="NULL"), 575 exp.ILike: no_ilike_sql, 576 exp.IntDiv: rename_func("DIV"), 577 exp.JSONFormat: rename_func("TO_JSON_STRING"), 578 exp.Max: max_or_greatest, 579 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 580 exp.MD5Digest: rename_func("MD5"), 581 exp.Min: min_or_least, 582 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 583 exp.RegexpExtract: lambda self, e: self.func( 584 "REGEXP_EXTRACT", 585 e.this, 586 e.expression, 587 e.args.get("position"), 588 e.args.get("occurrence"), 589 ), 590 exp.RegexpReplace: regexp_replace_sql, 591 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 592 exp.ReturnsProperty: _returnsproperty_sql, 593 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 594 exp.Select: transforms.preprocess( 595 [ 596 transforms.explode_to_unnest(), 597 _unqualify_unnest, 598 transforms.eliminate_distinct_on, 599 _alias_ordered_group, 600 transforms.eliminate_semi_and_anti_joins, 601 ] 602 ), 603 exp.SHA2: lambda self, e: self.func( 604 "SHA256" if e.text("length") == "256" else "SHA512", e.this 605 ), 606 exp.StabilityProperty: lambda self, e: ( 607 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 608 ), 609 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 610 exp.StrToTime: lambda self, e: self.func( 611 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 612 ), 613 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 614 exp.TimeFromParts: rename_func("TIME"), 615 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 616 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 617 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 618 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 619 exp.TimeStrToTime: timestrtotime_sql, 620 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 621 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 622 exp.TsOrDsAdd: _ts_or_ds_add_sql, 623 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 624 exp.TsOrDsToTime: rename_func("TIME"), 625 exp.Unhex: rename_func("FROM_HEX"), 626 exp.UnixDate: rename_func("UNIX_DATE"), 627 exp.UnixToTime: _unix_to_time_sql, 628 exp.Values: _derived_table_values_to_unnest, 629 exp.VariancePop: rename_func("VAR_POP"), 630 } 631 632 SUPPORTED_JSON_PATH_PARTS = { 633 exp.JSONPathKey, 634 exp.JSONPathRoot, 635 exp.JSONPathSubscript, 636 } 637 638 TYPE_MAPPING = { 639 **generator.Generator.TYPE_MAPPING, 640 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 641 exp.DataType.Type.BIGINT: "INT64", 642 exp.DataType.Type.BINARY: "BYTES", 643 exp.DataType.Type.BOOLEAN: "BOOL", 644 exp.DataType.Type.CHAR: "STRING", 645 exp.DataType.Type.DECIMAL: "NUMERIC", 646 exp.DataType.Type.DOUBLE: "FLOAT64", 647 exp.DataType.Type.FLOAT: "FLOAT64", 648 exp.DataType.Type.INT: "INT64", 649 exp.DataType.Type.NCHAR: "STRING", 650 exp.DataType.Type.NVARCHAR: "STRING", 651 exp.DataType.Type.SMALLINT: "INT64", 652 exp.DataType.Type.TEXT: "STRING", 653 exp.DataType.Type.TIMESTAMP: "DATETIME", 654 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 655 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 656 exp.DataType.Type.TINYINT: "INT64", 657 exp.DataType.Type.VARBINARY: "BYTES", 658 exp.DataType.Type.VARCHAR: "STRING", 659 exp.DataType.Type.VARIANT: "ANY TYPE", 660 } 661 662 PROPERTIES_LOCATION = { 663 **generator.Generator.PROPERTIES_LOCATION, 664 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 665 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 666 } 667 668 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 669 RESERVED_KEYWORDS = { 670 *generator.Generator.RESERVED_KEYWORDS, 671 "all", 672 "and", 673 "any", 674 "array", 675 "as", 676 "asc", 677 "assert_rows_modified", 678 "at", 679 "between", 680 "by", 681 "case", 682 "cast", 683 "collate", 684 "contains", 685 "create", 686 "cross", 687 "cube", 688 "current", 689 "default", 690 "define", 691 "desc", 692 "distinct", 693 "else", 694 "end", 695 "enum", 696 "escape", 697 "except", 698 "exclude", 699 "exists", 700 "extract", 701 "false", 702 "fetch", 703 "following", 704 "for", 705 "from", 706 "full", 707 "group", 708 "grouping", 709 "groups", 710 "hash", 711 "having", 712 "if", 713 "ignore", 714 "in", 715 "inner", 716 "intersect", 717 "interval", 718 "into", 719 "is", 720 "join", 721 "lateral", 722 "left", 723 "like", 724 "limit", 725 "lookup", 726 "merge", 727 "natural", 728 "new", 729 "no", 730 "not", 731 "null", 732 "nulls", 733 "of", 734 "on", 735 "or", 736 "order", 737 "outer", 738 "over", 739 "partition", 740 "preceding", 741 "proto", 742 "qualify", 743 "range", 744 "recursive", 745 "respect", 746 "right", 747 "rollup", 748 "rows", 749 "select", 750 "set", 751 "some", 752 "struct", 753 "tablesample", 754 "then", 755 "to", 756 "treat", 757 "true", 758 "unbounded", 759 "union", 760 "unnest", 761 "using", 762 "when", 763 "where", 764 "window", 765 "with", 766 "within", 767 } 768 769 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 770 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 771 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 772 773 def struct_sql(self, expression: exp.Struct) -> str: 774 args = [] 775 for expr in expression.expressions: 776 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 777 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 778 else: 779 arg = self.sql(expr) 780 781 args.append(arg) 782 783 return self.func("STRUCT", *args) 784 785 def eq_sql(self, expression: exp.EQ) -> str: 786 # Operands of = cannot be NULL in BigQuery 787 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 788 if not isinstance(expression.parent, exp.Update): 789 return "NULL" 790 791 return self.binary(expression, "=") 792 793 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 794 parent = expression.parent 795 796 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 797 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 798 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 799 return self.func( 800 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 801 ) 802 803 return super().attimezone_sql(expression) 804 805 def trycast_sql(self, expression: exp.TryCast) -> str: 806 return self.cast_sql(expression, safe_prefix="SAFE_") 807 808 def array_sql(self, expression: exp.Array) -> str: 809 first_arg = seq_get(expression.expressions, 0) 810 if isinstance(first_arg, exp.Subqueryable): 811 return f"ARRAY{self.wrap(self.sql(first_arg))}" 812 813 return inline_array_sql(self, expression) 814 815 def bracket_sql(self, expression: exp.Bracket) -> str: 816 this = self.sql(expression, "this") 817 expressions = expression.expressions 818 819 if len(expressions) == 1: 820 arg = expressions[0] 821 if arg.type is None: 822 from sqlglot.optimizer.annotate_types import annotate_types 823 824 arg = annotate_types(arg) 825 826 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 827 # BQ doesn't support bracket syntax with string values 828 return f"{this}.{arg.name}" 829 830 expressions_sql = ", ".join(self.sql(e) for e in expressions) 831 offset = expression.args.get("offset") 832 833 if offset == 0: 834 expressions_sql = f"OFFSET({expressions_sql})" 835 elif offset == 1: 836 expressions_sql = f"ORDINAL({expressions_sql})" 837 elif offset is not None: 838 self.unsupported(f"Unsupported array offset: {offset}") 839 840 if expression.args.get("safe"): 841 expressions_sql = f"SAFE_{expressions_sql}" 842 843 return f"{this}[{expressions_sql}]" 844 845 def in_unnest_op(self, expression: exp.Unnest) -> str: 846 return self.sql(expression) 847 848 def except_op(self, expression: exp.Except) -> str: 849 if not expression.args.get("distinct"): 850 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 851 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 852 853 def intersect_op(self, expression: exp.Intersect) -> str: 854 if not expression.args.get("distinct"): 855 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 856 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 857 858 def with_properties(self, properties: exp.Properties) -> str: 859 return self.properties(properties, prefix=self.seg("OPTIONS")) 860 861 def version_sql(self, expression: exp.Version) -> str: 862 if expression.name == "TIMESTAMP": 863 expression.set("this", "SYSTEM_TIME") 864 return super().version_sql(expression)
217class BigQuery(Dialect): 218 WEEK_OFFSET = -1 219 UNNEST_COLUMN_ONLY = True 220 SUPPORTS_USER_DEFINED_TYPES = False 221 SUPPORTS_SEMI_ANTI_JOIN = False 222 LOG_BASE_FIRST = False 223 224 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 225 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 226 227 # bigquery udfs are case sensitive 228 NORMALIZE_FUNCTIONS = False 229 230 TIME_MAPPING = { 231 "%D": "%m/%d/%y", 232 } 233 234 ESCAPE_SEQUENCES = { 235 "\\a": "\a", 236 "\\b": "\b", 237 "\\f": "\f", 238 "\\n": "\n", 239 "\\r": "\r", 240 "\\t": "\t", 241 "\\v": "\v", 242 } 243 244 FORMAT_MAPPING = { 245 "DD": "%d", 246 "MM": "%m", 247 "MON": "%b", 248 "MONTH": "%B", 249 "YYYY": "%Y", 250 "YY": "%y", 251 "HH": "%I", 252 "HH12": "%I", 253 "HH24": "%H", 254 "MI": "%M", 255 "SS": "%S", 256 "SSSSS": "%f", 257 "TZH": "%z", 258 } 259 260 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 261 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 262 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 263 264 def normalize_identifier(self, expression: E) -> E: 265 if isinstance(expression, exp.Identifier): 266 parent = expression.parent 267 while isinstance(parent, exp.Dot): 268 parent = parent.parent 269 270 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 271 # The following check is essentially a heuristic to detect tables based on whether or 272 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 273 if ( 274 not isinstance(parent, exp.UserDefinedFunction) 275 and not (isinstance(parent, exp.Table) and parent.db) 276 and not expression.meta.get("is_table") 277 ): 278 expression.set("this", expression.this.lower()) 279 280 return expression 281 282 class Tokenizer(tokens.Tokenizer): 283 QUOTES = ["'", '"', '"""', "'''"] 284 COMMENTS = ["--", "#", ("/*", "*/")] 285 IDENTIFIERS = ["`"] 286 STRING_ESCAPES = ["\\"] 287 288 HEX_STRINGS = [("0x", ""), ("0X", "")] 289 290 BYTE_STRINGS = [ 291 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 292 ] 293 294 RAW_STRINGS = [ 295 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 296 ] 297 298 KEYWORDS = { 299 **tokens.Tokenizer.KEYWORDS, 300 "ANY TYPE": TokenType.VARIANT, 301 "BEGIN": TokenType.COMMAND, 302 "BEGIN TRANSACTION": TokenType.BEGIN, 303 "BYTES": TokenType.BINARY, 304 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 305 "DECLARE": TokenType.COMMAND, 306 "EXCEPTION": TokenType.COMMAND, 307 "FLOAT64": TokenType.DOUBLE, 308 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 309 "MODEL": TokenType.MODEL, 310 "NOT DETERMINISTIC": TokenType.VOLATILE, 311 "RECORD": TokenType.STRUCT, 312 "TIMESTAMP": TokenType.TIMESTAMPTZ, 313 } 314 KEYWORDS.pop("DIV") 315 KEYWORDS.pop("VALUES") 316 317 class Parser(parser.Parser): 318 PREFIXED_PIVOT_COLUMNS = True 319 320 LOG_DEFAULTS_TO_LN = True 321 322 FUNCTIONS = { 323 **parser.Parser.FUNCTIONS, 324 "DATE": _build_date, 325 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 326 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 327 "DATE_TRUNC": lambda args: exp.DateTrunc( 328 unit=exp.Literal.string(str(seq_get(args, 1))), 329 this=seq_get(args, 0), 330 ), 331 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 332 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 333 "DIV": binary_from_function(exp.IntDiv), 334 "FORMAT_DATE": lambda args: exp.TimeToStr( 335 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 336 ), 337 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 338 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 339 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 340 ), 341 "MD5": exp.MD5Digest.from_arg_list, 342 "TO_HEX": _build_to_hex, 343 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 344 [seq_get(args, 1), seq_get(args, 0)] 345 ), 346 "PARSE_TIMESTAMP": _build_parse_timestamp, 347 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 348 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 349 this=seq_get(args, 0), 350 expression=seq_get(args, 1), 351 position=seq_get(args, 2), 352 occurrence=seq_get(args, 3), 353 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 354 ), 355 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 356 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 357 "SPLIT": lambda args: exp.Split( 358 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 359 this=seq_get(args, 0), 360 expression=seq_get(args, 1) or exp.Literal.string(","), 361 ), 362 "TIME": _build_time, 363 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 364 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 365 "TIMESTAMP": _build_timestamp, 366 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 367 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 368 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 369 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 370 ), 371 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 372 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 373 ), 374 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 375 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 376 } 377 378 FUNCTION_PARSERS = { 379 **parser.Parser.FUNCTION_PARSERS, 380 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 381 } 382 FUNCTION_PARSERS.pop("TRIM") 383 384 NO_PAREN_FUNCTIONS = { 385 **parser.Parser.NO_PAREN_FUNCTIONS, 386 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 387 } 388 389 NESTED_TYPE_TOKENS = { 390 *parser.Parser.NESTED_TYPE_TOKENS, 391 TokenType.TABLE, 392 } 393 394 PROPERTY_PARSERS = { 395 **parser.Parser.PROPERTY_PARSERS, 396 "NOT DETERMINISTIC": lambda self: self.expression( 397 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 398 ), 399 "OPTIONS": lambda self: self._parse_with_property(), 400 } 401 402 CONSTRAINT_PARSERS = { 403 **parser.Parser.CONSTRAINT_PARSERS, 404 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 405 } 406 407 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 408 RANGE_PARSERS.pop(TokenType.OVERLAPS) 409 410 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 411 412 STATEMENT_PARSERS = { 413 **parser.Parser.STATEMENT_PARSERS, 414 TokenType.END: lambda self: self._parse_as_command(self._prev), 415 TokenType.FOR: lambda self: self._parse_for_in(), 416 } 417 418 BRACKET_OFFSETS = { 419 "OFFSET": (0, False), 420 "ORDINAL": (1, False), 421 "SAFE_OFFSET": (0, True), 422 "SAFE_ORDINAL": (1, True), 423 } 424 425 def _parse_for_in(self) -> exp.ForIn: 426 this = self._parse_range() 427 self._match_text_seq("DO") 428 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 429 430 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 431 this = super()._parse_table_part(schema=schema) or self._parse_number() 432 433 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 434 if isinstance(this, exp.Identifier): 435 table_name = this.name 436 while self._match(TokenType.DASH, advance=False) and self._next: 437 self._advance(2) 438 table_name += f"-{self._prev.text}" 439 440 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 441 elif isinstance(this, exp.Literal): 442 table_name = this.name 443 444 if self._is_connected() and self._parse_var(any_token=True): 445 table_name += self._prev.text 446 447 this = exp.Identifier(this=table_name, quoted=True) 448 449 return this 450 451 def _parse_table_parts( 452 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 453 ) -> exp.Table: 454 table = super()._parse_table_parts( 455 schema=schema, is_db_reference=is_db_reference, wildcard=True 456 ) 457 458 if isinstance(table.this, exp.Identifier) and "." in table.name: 459 catalog, db, this, *rest = ( 460 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 461 for x in split_num_words(table.name, ".", 3) 462 ) 463 464 if rest and this: 465 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 466 467 table = exp.Table(this=this, db=db, catalog=catalog) 468 469 return table 470 471 @t.overload 472 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 473 ... 474 475 @t.overload 476 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 477 ... 478 479 def _parse_json_object(self, agg=False): 480 json_object = super()._parse_json_object() 481 array_kv_pair = seq_get(json_object.expressions, 0) 482 483 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 484 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 485 if ( 486 array_kv_pair 487 and isinstance(array_kv_pair.this, exp.Array) 488 and isinstance(array_kv_pair.expression, exp.Array) 489 ): 490 keys = array_kv_pair.this.expressions 491 values = array_kv_pair.expression.expressions 492 493 json_object.set( 494 "expressions", 495 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 496 ) 497 498 return json_object 499 500 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 501 bracket = super()._parse_bracket(this) 502 503 if this is bracket: 504 return bracket 505 506 if isinstance(bracket, exp.Bracket): 507 for expression in bracket.expressions: 508 name = expression.name.upper() 509 510 if name not in self.BRACKET_OFFSETS: 511 break 512 513 offset, safe = self.BRACKET_OFFSETS[name] 514 bracket.set("offset", offset) 515 bracket.set("safe", safe) 516 expression.replace(expression.expressions[0]) 517 518 return bracket 519 520 class Generator(generator.Generator): 521 EXPLICIT_UNION = True 522 INTERVAL_ALLOWS_PLURAL_FORM = False 523 JOIN_HINTS = False 524 QUERY_HINTS = False 525 TABLE_HINTS = False 526 LIMIT_FETCH = "LIMIT" 527 RENAME_TABLE_WITH_DB = False 528 NVL2_SUPPORTED = False 529 UNNEST_WITH_ORDINALITY = False 530 COLLATE_IS_FUNC = True 531 LIMIT_ONLY_LITERALS = True 532 SUPPORTS_TABLE_ALIAS_COLUMNS = False 533 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 534 JSON_KEY_VALUE_PAIR_SEP = "," 535 NULL_ORDERING_SUPPORTED = False 536 IGNORE_NULLS_IN_FUNC = True 537 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 538 CAN_IMPLEMENT_ARRAY_ANY = True 539 NAMED_PLACEHOLDER_TOKEN = "@" 540 541 TRANSFORMS = { 542 **generator.Generator.TRANSFORMS, 543 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 544 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 545 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 546 exp.ArrayContains: _array_contains_sql, 547 exp.ArrayFilter: filter_array_using_unnest, 548 exp.ArraySize: rename_func("ARRAY_LENGTH"), 549 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 550 exp.CollateProperty: lambda self, e: ( 551 f"DEFAULT COLLATE {self.sql(e, 'this')}" 552 if e.args.get("default") 553 else f"COLLATE {self.sql(e, 'this')}" 554 ), 555 exp.Commit: lambda *_: "COMMIT TRANSACTION", 556 exp.CountIf: rename_func("COUNTIF"), 557 exp.Create: _create_sql, 558 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 559 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 560 exp.DateDiff: lambda self, e: self.func( 561 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 562 ), 563 exp.DateFromParts: rename_func("DATE"), 564 exp.DateStrToDate: datestrtodate_sql, 565 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 566 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 567 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 568 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 569 exp.FromTimeZone: lambda self, e: self.func( 570 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 571 ), 572 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 573 exp.GroupConcat: rename_func("STRING_AGG"), 574 exp.Hex: rename_func("TO_HEX"), 575 exp.If: if_sql(false_value="NULL"), 576 exp.ILike: no_ilike_sql, 577 exp.IntDiv: rename_func("DIV"), 578 exp.JSONFormat: rename_func("TO_JSON_STRING"), 579 exp.Max: max_or_greatest, 580 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 581 exp.MD5Digest: rename_func("MD5"), 582 exp.Min: min_or_least, 583 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 584 exp.RegexpExtract: lambda self, e: self.func( 585 "REGEXP_EXTRACT", 586 e.this, 587 e.expression, 588 e.args.get("position"), 589 e.args.get("occurrence"), 590 ), 591 exp.RegexpReplace: regexp_replace_sql, 592 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 593 exp.ReturnsProperty: _returnsproperty_sql, 594 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 595 exp.Select: transforms.preprocess( 596 [ 597 transforms.explode_to_unnest(), 598 _unqualify_unnest, 599 transforms.eliminate_distinct_on, 600 _alias_ordered_group, 601 transforms.eliminate_semi_and_anti_joins, 602 ] 603 ), 604 exp.SHA2: lambda self, e: self.func( 605 "SHA256" if e.text("length") == "256" else "SHA512", e.this 606 ), 607 exp.StabilityProperty: lambda self, e: ( 608 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 609 ), 610 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 611 exp.StrToTime: lambda self, e: self.func( 612 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 613 ), 614 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 615 exp.TimeFromParts: rename_func("TIME"), 616 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 617 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 618 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 619 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 620 exp.TimeStrToTime: timestrtotime_sql, 621 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 622 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 623 exp.TsOrDsAdd: _ts_or_ds_add_sql, 624 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 625 exp.TsOrDsToTime: rename_func("TIME"), 626 exp.Unhex: rename_func("FROM_HEX"), 627 exp.UnixDate: rename_func("UNIX_DATE"), 628 exp.UnixToTime: _unix_to_time_sql, 629 exp.Values: _derived_table_values_to_unnest, 630 exp.VariancePop: rename_func("VAR_POP"), 631 } 632 633 SUPPORTED_JSON_PATH_PARTS = { 634 exp.JSONPathKey, 635 exp.JSONPathRoot, 636 exp.JSONPathSubscript, 637 } 638 639 TYPE_MAPPING = { 640 **generator.Generator.TYPE_MAPPING, 641 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 642 exp.DataType.Type.BIGINT: "INT64", 643 exp.DataType.Type.BINARY: "BYTES", 644 exp.DataType.Type.BOOLEAN: "BOOL", 645 exp.DataType.Type.CHAR: "STRING", 646 exp.DataType.Type.DECIMAL: "NUMERIC", 647 exp.DataType.Type.DOUBLE: "FLOAT64", 648 exp.DataType.Type.FLOAT: "FLOAT64", 649 exp.DataType.Type.INT: "INT64", 650 exp.DataType.Type.NCHAR: "STRING", 651 exp.DataType.Type.NVARCHAR: "STRING", 652 exp.DataType.Type.SMALLINT: "INT64", 653 exp.DataType.Type.TEXT: "STRING", 654 exp.DataType.Type.TIMESTAMP: "DATETIME", 655 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 656 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 657 exp.DataType.Type.TINYINT: "INT64", 658 exp.DataType.Type.VARBINARY: "BYTES", 659 exp.DataType.Type.VARCHAR: "STRING", 660 exp.DataType.Type.VARIANT: "ANY TYPE", 661 } 662 663 PROPERTIES_LOCATION = { 664 **generator.Generator.PROPERTIES_LOCATION, 665 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 666 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 667 } 668 669 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 670 RESERVED_KEYWORDS = { 671 *generator.Generator.RESERVED_KEYWORDS, 672 "all", 673 "and", 674 "any", 675 "array", 676 "as", 677 "asc", 678 "assert_rows_modified", 679 "at", 680 "between", 681 "by", 682 "case", 683 "cast", 684 "collate", 685 "contains", 686 "create", 687 "cross", 688 "cube", 689 "current", 690 "default", 691 "define", 692 "desc", 693 "distinct", 694 "else", 695 "end", 696 "enum", 697 "escape", 698 "except", 699 "exclude", 700 "exists", 701 "extract", 702 "false", 703 "fetch", 704 "following", 705 "for", 706 "from", 707 "full", 708 "group", 709 "grouping", 710 "groups", 711 "hash", 712 "having", 713 "if", 714 "ignore", 715 "in", 716 "inner", 717 "intersect", 718 "interval", 719 "into", 720 "is", 721 "join", 722 "lateral", 723 "left", 724 "like", 725 "limit", 726 "lookup", 727 "merge", 728 "natural", 729 "new", 730 "no", 731 "not", 732 "null", 733 "nulls", 734 "of", 735 "on", 736 "or", 737 "order", 738 "outer", 739 "over", 740 "partition", 741 "preceding", 742 "proto", 743 "qualify", 744 "range", 745 "recursive", 746 "respect", 747 "right", 748 "rollup", 749 "rows", 750 "select", 751 "set", 752 "some", 753 "struct", 754 "tablesample", 755 "then", 756 "to", 757 "treat", 758 "true", 759 "unbounded", 760 "union", 761 "unnest", 762 "using", 763 "when", 764 "where", 765 "window", 766 "with", 767 "within", 768 } 769 770 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 771 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 772 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 773 774 def struct_sql(self, expression: exp.Struct) -> str: 775 args = [] 776 for expr in expression.expressions: 777 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 778 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 779 else: 780 arg = self.sql(expr) 781 782 args.append(arg) 783 784 return self.func("STRUCT", *args) 785 786 def eq_sql(self, expression: exp.EQ) -> str: 787 # Operands of = cannot be NULL in BigQuery 788 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 789 if not isinstance(expression.parent, exp.Update): 790 return "NULL" 791 792 return self.binary(expression, "=") 793 794 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 795 parent = expression.parent 796 797 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 798 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 799 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 800 return self.func( 801 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 802 ) 803 804 return super().attimezone_sql(expression) 805 806 def trycast_sql(self, expression: exp.TryCast) -> str: 807 return self.cast_sql(expression, safe_prefix="SAFE_") 808 809 def array_sql(self, expression: exp.Array) -> str: 810 first_arg = seq_get(expression.expressions, 0) 811 if isinstance(first_arg, exp.Subqueryable): 812 return f"ARRAY{self.wrap(self.sql(first_arg))}" 813 814 return inline_array_sql(self, expression) 815 816 def bracket_sql(self, expression: exp.Bracket) -> str: 817 this = self.sql(expression, "this") 818 expressions = expression.expressions 819 820 if len(expressions) == 1: 821 arg = expressions[0] 822 if arg.type is None: 823 from sqlglot.optimizer.annotate_types import annotate_types 824 825 arg = annotate_types(arg) 826 827 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 828 # BQ doesn't support bracket syntax with string values 829 return f"{this}.{arg.name}" 830 831 expressions_sql = ", ".join(self.sql(e) for e in expressions) 832 offset = expression.args.get("offset") 833 834 if offset == 0: 835 expressions_sql = f"OFFSET({expressions_sql})" 836 elif offset == 1: 837 expressions_sql = f"ORDINAL({expressions_sql})" 838 elif offset is not None: 839 self.unsupported(f"Unsupported array offset: {offset}") 840 841 if expression.args.get("safe"): 842 expressions_sql = f"SAFE_{expressions_sql}" 843 844 return f"{this}[{expressions_sql}]" 845 846 def in_unnest_op(self, expression: exp.Unnest) -> str: 847 return self.sql(expression) 848 849 def except_op(self, expression: exp.Except) -> str: 850 if not expression.args.get("distinct"): 851 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 852 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 853 854 def intersect_op(self, expression: exp.Intersect) -> str: 855 if not expression.args.get("distinct"): 856 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 857 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 858 859 def with_properties(self, properties: exp.Properties) -> str: 860 return self.properties(properties, prefix=self.seg("OPTIONS")) 861 862 def version_sql(self, expression: exp.Version) -> str: 863 if expression.name == "TIMESTAMP": 864 expression.set("this", "SYSTEM_TIME") 865 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Mapping of an unescaped escape sequence to the corresponding character.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
264 def normalize_identifier(self, expression: E) -> E: 265 if isinstance(expression, exp.Identifier): 266 parent = expression.parent 267 while isinstance(parent, exp.Dot): 268 parent = parent.parent 269 270 # In BigQuery, CTEs aren't case-sensitive, but table names are (by default, at least). 271 # The following check is essentially a heuristic to detect tables based on whether or 272 # not they're qualified. It also avoids normalizing UDFs, because they're case-sensitive. 273 if ( 274 not isinstance(parent, exp.UserDefinedFunction) 275 and not (isinstance(parent, exp.Table) and parent.db) 276 and not expression.meta.get("is_table") 277 ): 278 expression.set("this", expression.this.lower()) 279 280 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- get_or_raise
- format_time
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
282 class Tokenizer(tokens.Tokenizer): 283 QUOTES = ["'", '"', '"""', "'''"] 284 COMMENTS = ["--", "#", ("/*", "*/")] 285 IDENTIFIERS = ["`"] 286 STRING_ESCAPES = ["\\"] 287 288 HEX_STRINGS = [("0x", ""), ("0X", "")] 289 290 BYTE_STRINGS = [ 291 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 292 ] 293 294 RAW_STRINGS = [ 295 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 296 ] 297 298 KEYWORDS = { 299 **tokens.Tokenizer.KEYWORDS, 300 "ANY TYPE": TokenType.VARIANT, 301 "BEGIN": TokenType.COMMAND, 302 "BEGIN TRANSACTION": TokenType.BEGIN, 303 "BYTES": TokenType.BINARY, 304 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 305 "DECLARE": TokenType.COMMAND, 306 "EXCEPTION": TokenType.COMMAND, 307 "FLOAT64": TokenType.DOUBLE, 308 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 309 "MODEL": TokenType.MODEL, 310 "NOT DETERMINISTIC": TokenType.VOLATILE, 311 "RECORD": TokenType.STRUCT, 312 "TIMESTAMP": TokenType.TIMESTAMPTZ, 313 } 314 KEYWORDS.pop("DIV") 315 KEYWORDS.pop("VALUES")
Inherited Members
317 class Parser(parser.Parser): 318 PREFIXED_PIVOT_COLUMNS = True 319 320 LOG_DEFAULTS_TO_LN = True 321 322 FUNCTIONS = { 323 **parser.Parser.FUNCTIONS, 324 "DATE": _build_date, 325 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 326 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 327 "DATE_TRUNC": lambda args: exp.DateTrunc( 328 unit=exp.Literal.string(str(seq_get(args, 1))), 329 this=seq_get(args, 0), 330 ), 331 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 332 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 333 "DIV": binary_from_function(exp.IntDiv), 334 "FORMAT_DATE": lambda args: exp.TimeToStr( 335 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 336 ), 337 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 338 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 339 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 340 ), 341 "MD5": exp.MD5Digest.from_arg_list, 342 "TO_HEX": _build_to_hex, 343 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 344 [seq_get(args, 1), seq_get(args, 0)] 345 ), 346 "PARSE_TIMESTAMP": _build_parse_timestamp, 347 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 348 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 349 this=seq_get(args, 0), 350 expression=seq_get(args, 1), 351 position=seq_get(args, 2), 352 occurrence=seq_get(args, 3), 353 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 354 ), 355 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 356 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 357 "SPLIT": lambda args: exp.Split( 358 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 359 this=seq_get(args, 0), 360 expression=seq_get(args, 1) or exp.Literal.string(","), 361 ), 362 "TIME": _build_time, 363 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 364 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 365 "TIMESTAMP": _build_timestamp, 366 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 367 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 368 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 369 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 370 ), 371 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 372 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 373 ), 374 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 375 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 376 } 377 378 FUNCTION_PARSERS = { 379 **parser.Parser.FUNCTION_PARSERS, 380 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 381 } 382 FUNCTION_PARSERS.pop("TRIM") 383 384 NO_PAREN_FUNCTIONS = { 385 **parser.Parser.NO_PAREN_FUNCTIONS, 386 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 387 } 388 389 NESTED_TYPE_TOKENS = { 390 *parser.Parser.NESTED_TYPE_TOKENS, 391 TokenType.TABLE, 392 } 393 394 PROPERTY_PARSERS = { 395 **parser.Parser.PROPERTY_PARSERS, 396 "NOT DETERMINISTIC": lambda self: self.expression( 397 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 398 ), 399 "OPTIONS": lambda self: self._parse_with_property(), 400 } 401 402 CONSTRAINT_PARSERS = { 403 **parser.Parser.CONSTRAINT_PARSERS, 404 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 405 } 406 407 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 408 RANGE_PARSERS.pop(TokenType.OVERLAPS) 409 410 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 411 412 STATEMENT_PARSERS = { 413 **parser.Parser.STATEMENT_PARSERS, 414 TokenType.END: lambda self: self._parse_as_command(self._prev), 415 TokenType.FOR: lambda self: self._parse_for_in(), 416 } 417 418 BRACKET_OFFSETS = { 419 "OFFSET": (0, False), 420 "ORDINAL": (1, False), 421 "SAFE_OFFSET": (0, True), 422 "SAFE_ORDINAL": (1, True), 423 } 424 425 def _parse_for_in(self) -> exp.ForIn: 426 this = self._parse_range() 427 self._match_text_seq("DO") 428 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 429 430 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 431 this = super()._parse_table_part(schema=schema) or self._parse_number() 432 433 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 434 if isinstance(this, exp.Identifier): 435 table_name = this.name 436 while self._match(TokenType.DASH, advance=False) and self._next: 437 self._advance(2) 438 table_name += f"-{self._prev.text}" 439 440 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 441 elif isinstance(this, exp.Literal): 442 table_name = this.name 443 444 if self._is_connected() and self._parse_var(any_token=True): 445 table_name += self._prev.text 446 447 this = exp.Identifier(this=table_name, quoted=True) 448 449 return this 450 451 def _parse_table_parts( 452 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 453 ) -> exp.Table: 454 table = super()._parse_table_parts( 455 schema=schema, is_db_reference=is_db_reference, wildcard=True 456 ) 457 458 if isinstance(table.this, exp.Identifier) and "." in table.name: 459 catalog, db, this, *rest = ( 460 t.cast(t.Optional[exp.Expression], exp.to_identifier(x)) 461 for x in split_num_words(table.name, ".", 3) 462 ) 463 464 if rest and this: 465 this = exp.Dot.build(t.cast(t.List[exp.Expression], [this, *rest])) 466 467 table = exp.Table(this=this, db=db, catalog=catalog) 468 469 return table 470 471 @t.overload 472 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: 473 ... 474 475 @t.overload 476 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: 477 ... 478 479 def _parse_json_object(self, agg=False): 480 json_object = super()._parse_json_object() 481 array_kv_pair = seq_get(json_object.expressions, 0) 482 483 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 484 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 485 if ( 486 array_kv_pair 487 and isinstance(array_kv_pair.this, exp.Array) 488 and isinstance(array_kv_pair.expression, exp.Array) 489 ): 490 keys = array_kv_pair.this.expressions 491 values = array_kv_pair.expression.expressions 492 493 json_object.set( 494 "expressions", 495 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 496 ) 497 498 return json_object 499 500 def _parse_bracket(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 501 bracket = super()._parse_bracket(this) 502 503 if this is bracket: 504 return bracket 505 506 if isinstance(bracket, exp.Bracket): 507 for expression in bracket.expressions: 508 name = expression.name.upper() 509 510 if name not in self.BRACKET_OFFSETS: 511 break 512 513 offset, safe = self.BRACKET_OFFSETS[name] 514 bracket.set("offset", offset) 515 bracket.set("safe", safe) 516 expression.replace(expression.expressions[0]) 517 518 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_UNION
- UNION_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
520 class Generator(generator.Generator): 521 EXPLICIT_UNION = True 522 INTERVAL_ALLOWS_PLURAL_FORM = False 523 JOIN_HINTS = False 524 QUERY_HINTS = False 525 TABLE_HINTS = False 526 LIMIT_FETCH = "LIMIT" 527 RENAME_TABLE_WITH_DB = False 528 NVL2_SUPPORTED = False 529 UNNEST_WITH_ORDINALITY = False 530 COLLATE_IS_FUNC = True 531 LIMIT_ONLY_LITERALS = True 532 SUPPORTS_TABLE_ALIAS_COLUMNS = False 533 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 534 JSON_KEY_VALUE_PAIR_SEP = "," 535 NULL_ORDERING_SUPPORTED = False 536 IGNORE_NULLS_IN_FUNC = True 537 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 538 CAN_IMPLEMENT_ARRAY_ANY = True 539 NAMED_PLACEHOLDER_TOKEN = "@" 540 541 TRANSFORMS = { 542 **generator.Generator.TRANSFORMS, 543 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 544 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 545 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 546 exp.ArrayContains: _array_contains_sql, 547 exp.ArrayFilter: filter_array_using_unnest, 548 exp.ArraySize: rename_func("ARRAY_LENGTH"), 549 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 550 exp.CollateProperty: lambda self, e: ( 551 f"DEFAULT COLLATE {self.sql(e, 'this')}" 552 if e.args.get("default") 553 else f"COLLATE {self.sql(e, 'this')}" 554 ), 555 exp.Commit: lambda *_: "COMMIT TRANSACTION", 556 exp.CountIf: rename_func("COUNTIF"), 557 exp.Create: _create_sql, 558 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 559 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 560 exp.DateDiff: lambda self, e: self.func( 561 "DATE_DIFF", e.this, e.expression, e.unit or "DAY" 562 ), 563 exp.DateFromParts: rename_func("DATE"), 564 exp.DateStrToDate: datestrtodate_sql, 565 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 566 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 567 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 568 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 569 exp.FromTimeZone: lambda self, e: self.func( 570 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 571 ), 572 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 573 exp.GroupConcat: rename_func("STRING_AGG"), 574 exp.Hex: rename_func("TO_HEX"), 575 exp.If: if_sql(false_value="NULL"), 576 exp.ILike: no_ilike_sql, 577 exp.IntDiv: rename_func("DIV"), 578 exp.JSONFormat: rename_func("TO_JSON_STRING"), 579 exp.Max: max_or_greatest, 580 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 581 exp.MD5Digest: rename_func("MD5"), 582 exp.Min: min_or_least, 583 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 584 exp.RegexpExtract: lambda self, e: self.func( 585 "REGEXP_EXTRACT", 586 e.this, 587 e.expression, 588 e.args.get("position"), 589 e.args.get("occurrence"), 590 ), 591 exp.RegexpReplace: regexp_replace_sql, 592 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 593 exp.ReturnsProperty: _returnsproperty_sql, 594 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 595 exp.Select: transforms.preprocess( 596 [ 597 transforms.explode_to_unnest(), 598 _unqualify_unnest, 599 transforms.eliminate_distinct_on, 600 _alias_ordered_group, 601 transforms.eliminate_semi_and_anti_joins, 602 ] 603 ), 604 exp.SHA2: lambda self, e: self.func( 605 "SHA256" if e.text("length") == "256" else "SHA512", e.this 606 ), 607 exp.StabilityProperty: lambda self, e: ( 608 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 609 ), 610 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 611 exp.StrToTime: lambda self, e: self.func( 612 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 613 ), 614 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 615 exp.TimeFromParts: rename_func("TIME"), 616 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 617 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 618 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 619 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 620 exp.TimeStrToTime: timestrtotime_sql, 621 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 622 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 623 exp.TsOrDsAdd: _ts_or_ds_add_sql, 624 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 625 exp.TsOrDsToTime: rename_func("TIME"), 626 exp.Unhex: rename_func("FROM_HEX"), 627 exp.UnixDate: rename_func("UNIX_DATE"), 628 exp.UnixToTime: _unix_to_time_sql, 629 exp.Values: _derived_table_values_to_unnest, 630 exp.VariancePop: rename_func("VAR_POP"), 631 } 632 633 SUPPORTED_JSON_PATH_PARTS = { 634 exp.JSONPathKey, 635 exp.JSONPathRoot, 636 exp.JSONPathSubscript, 637 } 638 639 TYPE_MAPPING = { 640 **generator.Generator.TYPE_MAPPING, 641 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 642 exp.DataType.Type.BIGINT: "INT64", 643 exp.DataType.Type.BINARY: "BYTES", 644 exp.DataType.Type.BOOLEAN: "BOOL", 645 exp.DataType.Type.CHAR: "STRING", 646 exp.DataType.Type.DECIMAL: "NUMERIC", 647 exp.DataType.Type.DOUBLE: "FLOAT64", 648 exp.DataType.Type.FLOAT: "FLOAT64", 649 exp.DataType.Type.INT: "INT64", 650 exp.DataType.Type.NCHAR: "STRING", 651 exp.DataType.Type.NVARCHAR: "STRING", 652 exp.DataType.Type.SMALLINT: "INT64", 653 exp.DataType.Type.TEXT: "STRING", 654 exp.DataType.Type.TIMESTAMP: "DATETIME", 655 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 656 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 657 exp.DataType.Type.TINYINT: "INT64", 658 exp.DataType.Type.VARBINARY: "BYTES", 659 exp.DataType.Type.VARCHAR: "STRING", 660 exp.DataType.Type.VARIANT: "ANY TYPE", 661 } 662 663 PROPERTIES_LOCATION = { 664 **generator.Generator.PROPERTIES_LOCATION, 665 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 666 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 667 } 668 669 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 670 RESERVED_KEYWORDS = { 671 *generator.Generator.RESERVED_KEYWORDS, 672 "all", 673 "and", 674 "any", 675 "array", 676 "as", 677 "asc", 678 "assert_rows_modified", 679 "at", 680 "between", 681 "by", 682 "case", 683 "cast", 684 "collate", 685 "contains", 686 "create", 687 "cross", 688 "cube", 689 "current", 690 "default", 691 "define", 692 "desc", 693 "distinct", 694 "else", 695 "end", 696 "enum", 697 "escape", 698 "except", 699 "exclude", 700 "exists", 701 "extract", 702 "false", 703 "fetch", 704 "following", 705 "for", 706 "from", 707 "full", 708 "group", 709 "grouping", 710 "groups", 711 "hash", 712 "having", 713 "if", 714 "ignore", 715 "in", 716 "inner", 717 "intersect", 718 "interval", 719 "into", 720 "is", 721 "join", 722 "lateral", 723 "left", 724 "like", 725 "limit", 726 "lookup", 727 "merge", 728 "natural", 729 "new", 730 "no", 731 "not", 732 "null", 733 "nulls", 734 "of", 735 "on", 736 "or", 737 "order", 738 "outer", 739 "over", 740 "partition", 741 "preceding", 742 "proto", 743 "qualify", 744 "range", 745 "recursive", 746 "respect", 747 "right", 748 "rollup", 749 "rows", 750 "select", 751 "set", 752 "some", 753 "struct", 754 "tablesample", 755 "then", 756 "to", 757 "treat", 758 "true", 759 "unbounded", 760 "union", 761 "unnest", 762 "using", 763 "when", 764 "where", 765 "window", 766 "with", 767 "within", 768 } 769 770 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 771 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 772 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 773 774 def struct_sql(self, expression: exp.Struct) -> str: 775 args = [] 776 for expr in expression.expressions: 777 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 778 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 779 else: 780 arg = self.sql(expr) 781 782 args.append(arg) 783 784 return self.func("STRUCT", *args) 785 786 def eq_sql(self, expression: exp.EQ) -> str: 787 # Operands of = cannot be NULL in BigQuery 788 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 789 if not isinstance(expression.parent, exp.Update): 790 return "NULL" 791 792 return self.binary(expression, "=") 793 794 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 795 parent = expression.parent 796 797 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 798 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 799 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 800 return self.func( 801 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 802 ) 803 804 return super().attimezone_sql(expression) 805 806 def trycast_sql(self, expression: exp.TryCast) -> str: 807 return self.cast_sql(expression, safe_prefix="SAFE_") 808 809 def array_sql(self, expression: exp.Array) -> str: 810 first_arg = seq_get(expression.expressions, 0) 811 if isinstance(first_arg, exp.Subqueryable): 812 return f"ARRAY{self.wrap(self.sql(first_arg))}" 813 814 return inline_array_sql(self, expression) 815 816 def bracket_sql(self, expression: exp.Bracket) -> str: 817 this = self.sql(expression, "this") 818 expressions = expression.expressions 819 820 if len(expressions) == 1: 821 arg = expressions[0] 822 if arg.type is None: 823 from sqlglot.optimizer.annotate_types import annotate_types 824 825 arg = annotate_types(arg) 826 827 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 828 # BQ doesn't support bracket syntax with string values 829 return f"{this}.{arg.name}" 830 831 expressions_sql = ", ".join(self.sql(e) for e in expressions) 832 offset = expression.args.get("offset") 833 834 if offset == 0: 835 expressions_sql = f"OFFSET({expressions_sql})" 836 elif offset == 1: 837 expressions_sql = f"ORDINAL({expressions_sql})" 838 elif offset is not None: 839 self.unsupported(f"Unsupported array offset: {offset}") 840 841 if expression.args.get("safe"): 842 expressions_sql = f"SAFE_{expressions_sql}" 843 844 return f"{this}[{expressions_sql}]" 845 846 def in_unnest_op(self, expression: exp.Unnest) -> str: 847 return self.sql(expression) 848 849 def except_op(self, expression: exp.Except) -> str: 850 if not expression.args.get("distinct"): 851 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 852 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 853 854 def intersect_op(self, expression: exp.Intersect) -> str: 855 if not expression.args.get("distinct"): 856 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 857 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 858 859 def with_properties(self, properties: exp.Properties) -> str: 860 return self.properties(properties, prefix=self.seg("OPTIONS")) 861 862 def version_sql(self, expression: exp.Version) -> str: 863 if expression.name == "TIMESTAMP": 864 expression.set("this", "SYSTEM_TIME") 865 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. Default: 2.
- indent: The indentation size in a formatted string. Default: 2.
- normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
774 def struct_sql(self, expression: exp.Struct) -> str: 775 args = [] 776 for expr in expression.expressions: 777 if isinstance(expr, self.KEY_VALUE_DEFINITIONS): 778 arg = f"{self.sql(expr, 'expression')} AS {expr.this.name}" 779 else: 780 arg = self.sql(expr) 781 782 args.append(arg) 783 784 return self.func("STRUCT", *args)
794 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 795 parent = expression.parent 796 797 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 798 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 799 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 800 return self.func( 801 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 802 ) 803 804 return super().attimezone_sql(expression)
816 def bracket_sql(self, expression: exp.Bracket) -> str: 817 this = self.sql(expression, "this") 818 expressions = expression.expressions 819 820 if len(expressions) == 1: 821 arg = expressions[0] 822 if arg.type is None: 823 from sqlglot.optimizer.annotate_types import annotate_types 824 825 arg = annotate_types(arg) 826 827 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 828 # BQ doesn't support bracket syntax with string values 829 return f"{this}.{arg.name}" 830 831 expressions_sql = ", ".join(self.sql(e) for e in expressions) 832 offset = expression.args.get("offset") 833 834 if offset == 0: 835 expressions_sql = f"OFFSET({expressions_sql})" 836 elif offset == 1: 837 expressions_sql = f"ORDINAL({expressions_sql})" 838 elif offset is not None: 839 self.unsupported(f"Unsupported array offset: {offset}") 840 841 if expression.args.get("safe"): 842 expressions_sql = f"SAFE_{expressions_sql}" 843 844 return f"{this}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- KEY_VALUE_DEFINITIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- renamecolumn_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- arrayany_sql