sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 approx_count_distinct_sql, 12 arg_max_or_min_no_count, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 date_trunc_to_time, 18 datestrtodate_sql, 19 no_datetime_sql, 20 encode_decode_sql, 21 build_formatted_time, 22 inline_array_unless_query, 23 no_comment_column_constraint_sql, 24 no_safe_divide_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 rename_func, 29 str_position_sql, 30 str_to_time_sql, 31 timestamptrunc_sql, 32 timestrtotime_sql, 33 unit_to_var, 34 unit_to_str, 35 sha256_sql, 36 build_regexp_extract, 37 explode_to_unnest_sql, 38) 39from sqlglot.generator import unsupported_args 40from sqlglot.helper import seq_get 41from sqlglot.tokens import TokenType 42from sqlglot.parser import binary_range_parser 43 44DATETIME_DELTA = t.Union[ 45 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 46] 47 48WINDOW_FUNCS_WITH_IGNORE_NULLS = ( 49 exp.FirstValue, 50 exp.LastValue, 51 exp.Lag, 52 exp.Lead, 53 exp.NthValue, 54) 55 56 57def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 58 this = expression.this 59 unit = unit_to_var(expression) 60 op = ( 61 "+" 62 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 63 else "-" 64 ) 65 66 to_type: t.Optional[DATA_TYPE] = None 67 if isinstance(expression, exp.TsOrDsAdd): 68 to_type = expression.return_type 69 elif this.is_string: 70 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 71 to_type = ( 72 exp.DataType.Type.DATETIME 73 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 74 else exp.DataType.Type.DATE 75 ) 76 77 this = exp.cast(this, to_type) if to_type else this 78 79 return f"{self.sql(this)} {op} {self.sql(exp.Interval(this=expression.expression, unit=unit))}" 80 81 82# BigQuery -> DuckDB conversion for the DATE function 83def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 84 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 85 zone = self.sql(expression, "zone") 86 87 if zone: 88 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 89 date_str = f"{date_str} || ' ' || {zone}" 90 91 # This will create a TIMESTAMP with time zone information 92 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 93 94 return result 95 96 97# BigQuery -> DuckDB conversion for the TIME_DIFF function 98def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 99 this = exp.cast(expression.this, exp.DataType.Type.TIME) 100 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 101 102 # Although the 2 dialects share similar signatures, BQ seems to inverse 103 # the sign of the result so the start/end time operands are flipped 104 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 105 106 107@unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 108def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 109 return self.func("ARRAY_SORT", expression.this) 110 111 112def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 113 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 114 return self.func(name, expression.this) 115 116 117def _build_sort_array_desc(args: t.List) -> exp.Expression: 118 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 119 120 121def _build_date_diff(args: t.List) -> exp.Expression: 122 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 123 124 125def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 126 def _builder(args: t.List) -> exp.GenerateSeries: 127 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 128 if len(args) == 1: 129 # DuckDB uses 0 as a default for the series' start when it's omitted 130 args.insert(0, exp.Literal.number("0")) 131 132 gen_series = exp.GenerateSeries.from_arg_list(args) 133 gen_series.set("is_end_exclusive", end_exclusive) 134 135 return gen_series 136 137 return _builder 138 139 140def _build_make_timestamp(args: t.List) -> exp.Expression: 141 if len(args) == 1: 142 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 143 144 return exp.TimestampFromParts( 145 year=seq_get(args, 0), 146 month=seq_get(args, 1), 147 day=seq_get(args, 2), 148 hour=seq_get(args, 3), 149 min=seq_get(args, 4), 150 sec=seq_get(args, 5), 151 ) 152 153 154def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 155 args: t.List[str] = [] 156 157 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 158 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 159 # The transformation to ROW will take place if: 160 # 1. The STRUCT itself does not have proper fields (key := value) as a "proper" STRUCT would 161 # 2. A cast to STRUCT / ARRAY of STRUCTs is found 162 ancestor_cast = expression.find_ancestor(exp.Cast) 163 is_bq_inline_struct = ( 164 (expression.find(exp.PropertyEQ) is None) 165 and ancestor_cast 166 and any( 167 casted_type.is_type(exp.DataType.Type.STRUCT) 168 for casted_type in ancestor_cast.find_all(exp.DataType) 169 ) 170 ) 171 172 for i, expr in enumerate(expression.expressions): 173 is_property_eq = isinstance(expr, exp.PropertyEQ) 174 value = expr.expression if is_property_eq else expr 175 176 if is_bq_inline_struct: 177 args.append(self.sql(value)) 178 else: 179 key = expr.name if is_property_eq else f"_{i}" 180 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 181 182 csv_args = ", ".join(args) 183 184 return f"ROW({csv_args})" if is_bq_inline_struct else f"{{{csv_args}}}" 185 186 187def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 188 if expression.is_type("array"): 189 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 190 191 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 192 if expression.is_type( 193 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 194 ): 195 return expression.this.value 196 197 return self.datatype_sql(expression) 198 199 200def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 201 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 202 return f"CAST({sql} AS TEXT)" 203 204 205def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 206 scale = expression.args.get("scale") 207 timestamp = expression.this 208 209 if scale in (None, exp.UnixToTime.SECONDS): 210 return self.func("TO_TIMESTAMP", timestamp) 211 if scale == exp.UnixToTime.MILLIS: 212 return self.func("EPOCH_MS", timestamp) 213 if scale == exp.UnixToTime.MICROS: 214 return self.func("MAKE_TIMESTAMP", timestamp) 215 216 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 217 218 219WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 220 221 222def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 223 arrow_sql = arrow_json_extract_sql(self, expression) 224 if not expression.same_parent and isinstance( 225 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 226 ): 227 arrow_sql = self.wrap(arrow_sql) 228 return arrow_sql 229 230 231def _implicit_datetime_cast( 232 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 233) -> t.Optional[exp.Expression]: 234 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 235 236 237def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 238 this = _implicit_datetime_cast(expression.this) 239 expr = _implicit_datetime_cast(expression.expression) 240 241 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 242 243 244def _generate_datetime_array_sql( 245 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 246) -> str: 247 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 248 249 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 250 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 251 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 252 253 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 254 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 255 start=start, end=end, step=expression.args.get("step") 256 ) 257 258 if is_generate_date_array: 259 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 260 # GENERATE_DATE_ARRAY we must cast it back to DATE array 261 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 262 263 return self.sql(gen_series) 264 265 266def _json_extract_value_array_sql( 267 self: DuckDB.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 268) -> str: 269 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 270 data_type = "ARRAY<STRING>" if isinstance(expression, exp.JSONValueArray) else "ARRAY<JSON>" 271 return self.sql(exp.cast(json_extract, to=exp.DataType.build(data_type))) 272 273 274class DuckDB(Dialect): 275 NULL_ORDERING = "nulls_are_last" 276 SUPPORTS_USER_DEFINED_TYPES = False 277 SAFE_DIVISION = True 278 INDEX_OFFSET = 1 279 CONCAT_COALESCE = True 280 SUPPORTS_ORDER_BY_ALL = True 281 SUPPORTS_FIXED_SIZE_ARRAYS = True 282 STRICT_JSON_PATH_SYNTAX = False 283 284 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 285 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 286 287 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 288 if isinstance(path, exp.Literal): 289 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 290 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 291 # This check ensures we'll avoid trying to parse these as JSON paths, which can 292 # either result in a noisy warning or in an invalid representation of the path. 293 path_text = path.name 294 if path_text.startswith("/") or "[#" in path_text: 295 return path 296 297 return super().to_json_path(path) 298 299 class Tokenizer(tokens.Tokenizer): 300 HEREDOC_STRINGS = ["$"] 301 302 HEREDOC_TAG_IS_IDENTIFIER = True 303 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 304 305 KEYWORDS = { 306 **tokens.Tokenizer.KEYWORDS, 307 "//": TokenType.DIV, 308 "**": TokenType.DSTAR, 309 "^@": TokenType.CARET_AT, 310 "@>": TokenType.AT_GT, 311 "<@": TokenType.LT_AT, 312 "ATTACH": TokenType.COMMAND, 313 "BINARY": TokenType.VARBINARY, 314 "BITSTRING": TokenType.BIT, 315 "BPCHAR": TokenType.TEXT, 316 "CHAR": TokenType.TEXT, 317 "CHARACTER VARYING": TokenType.TEXT, 318 "EXCLUDE": TokenType.EXCEPT, 319 "LOGICAL": TokenType.BOOLEAN, 320 "ONLY": TokenType.ONLY, 321 "PIVOT_WIDER": TokenType.PIVOT, 322 "POSITIONAL": TokenType.POSITIONAL, 323 "SIGNED": TokenType.INT, 324 "STRING": TokenType.TEXT, 325 "SUMMARIZE": TokenType.SUMMARIZE, 326 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 327 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 328 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 329 "TIMESTAMP_US": TokenType.TIMESTAMP, 330 "UBIGINT": TokenType.UBIGINT, 331 "UINTEGER": TokenType.UINT, 332 "USMALLINT": TokenType.USMALLINT, 333 "UTINYINT": TokenType.UTINYINT, 334 "VARCHAR": TokenType.TEXT, 335 } 336 KEYWORDS.pop("/*+") 337 338 SINGLE_TOKENS = { 339 **tokens.Tokenizer.SINGLE_TOKENS, 340 "$": TokenType.PARAMETER, 341 } 342 343 class Parser(parser.Parser): 344 BITWISE = { 345 **parser.Parser.BITWISE, 346 TokenType.TILDA: exp.RegexpLike, 347 } 348 BITWISE.pop(TokenType.CARET) 349 350 RANGE_PARSERS = { 351 **parser.Parser.RANGE_PARSERS, 352 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 353 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 354 } 355 356 EXPONENT = { 357 **parser.Parser.EXPONENT, 358 TokenType.CARET: exp.Pow, 359 TokenType.DSTAR: exp.Pow, 360 } 361 362 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 363 364 FUNCTIONS = { 365 **parser.Parser.FUNCTIONS, 366 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 367 "ARRAY_SORT": exp.SortArray.from_arg_list, 368 "DATEDIFF": _build_date_diff, 369 "DATE_DIFF": _build_date_diff, 370 "DATE_TRUNC": date_trunc_to_time, 371 "DATETRUNC": date_trunc_to_time, 372 "DECODE": lambda args: exp.Decode( 373 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 374 ), 375 "ENCODE": lambda args: exp.Encode( 376 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 377 ), 378 "EPOCH": exp.TimeToUnix.from_arg_list, 379 "EPOCH_MS": lambda args: exp.UnixToTime( 380 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 381 ), 382 "JSON": exp.ParseJSON.from_arg_list, 383 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 384 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 385 "LIST_HAS": exp.ArrayContains.from_arg_list, 386 "LIST_REVERSE_SORT": _build_sort_array_desc, 387 "LIST_SORT": exp.SortArray.from_arg_list, 388 "LIST_VALUE": lambda args: exp.Array(expressions=args), 389 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 390 "MAKE_TIMESTAMP": _build_make_timestamp, 391 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 392 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 393 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 394 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 395 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 396 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 397 this=seq_get(args, 0), 398 expression=seq_get(args, 1), 399 replacement=seq_get(args, 2), 400 modifiers=seq_get(args, 3), 401 ), 402 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 403 "STRING_SPLIT": exp.Split.from_arg_list, 404 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 405 "STRING_TO_ARRAY": exp.Split.from_arg_list, 406 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 407 "STRUCT_PACK": exp.Struct.from_arg_list, 408 "STR_SPLIT": exp.Split.from_arg_list, 409 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 410 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 411 "UNNEST": exp.Explode.from_arg_list, 412 "XOR": binary_from_function(exp.BitwiseXor), 413 "GENERATE_SERIES": _build_generate_series(), 414 "RANGE": _build_generate_series(end_exclusive=True), 415 "EDITDIST3": exp.Levenshtein.from_arg_list, 416 } 417 418 FUNCTIONS.pop("DATE_SUB") 419 FUNCTIONS.pop("GLOB") 420 421 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 422 FUNCTION_PARSERS.pop("DECODE") 423 424 NO_PAREN_FUNCTION_PARSERS = { 425 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 426 "MAP": lambda self: self._parse_map(), 427 } 428 429 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 430 TokenType.SEMI, 431 TokenType.ANTI, 432 } 433 434 PLACEHOLDER_PARSERS = { 435 **parser.Parser.PLACEHOLDER_PARSERS, 436 TokenType.PARAMETER: lambda self: ( 437 self.expression(exp.Placeholder, this=self._prev.text) 438 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 439 else None 440 ), 441 } 442 443 TYPE_CONVERTERS = { 444 # https://duckdb.org/docs/sql/data_types/numeric 445 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 446 # https://duckdb.org/docs/sql/data_types/text 447 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 448 } 449 450 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 451 # https://duckdb.org/docs/sql/samples.html 452 sample = super()._parse_table_sample(as_modifier=as_modifier) 453 if sample and not sample.args.get("method"): 454 if sample.args.get("size"): 455 sample.set("method", exp.var("RESERVOIR")) 456 else: 457 sample.set("method", exp.var("SYSTEM")) 458 459 return sample 460 461 def _parse_bracket( 462 self, this: t.Optional[exp.Expression] = None 463 ) -> t.Optional[exp.Expression]: 464 bracket = super()._parse_bracket(this) 465 if isinstance(bracket, exp.Bracket): 466 bracket.set("returns_list_for_maps", True) 467 468 return bracket 469 470 def _parse_map(self) -> exp.ToMap | exp.Map: 471 if self._match(TokenType.L_BRACE, advance=False): 472 return self.expression(exp.ToMap, this=self._parse_bracket()) 473 474 args = self._parse_wrapped_csv(self._parse_assignment) 475 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 476 477 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 478 return self._parse_field_def() 479 480 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 481 if len(aggregations) == 1: 482 return super()._pivot_column_names(aggregations) 483 return pivot_column_names(aggregations, dialect="duckdb") 484 485 class Generator(generator.Generator): 486 PARAMETER_TOKEN = "$" 487 NAMED_PLACEHOLDER_TOKEN = "$" 488 JOIN_HINTS = False 489 TABLE_HINTS = False 490 QUERY_HINTS = False 491 LIMIT_FETCH = "LIMIT" 492 STRUCT_DELIMITER = ("(", ")") 493 RENAME_TABLE_WITH_DB = False 494 NVL2_SUPPORTED = False 495 SEMI_ANTI_JOIN_WITH_SIDE = False 496 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 497 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 498 LAST_DAY_SUPPORTS_DATE_PART = False 499 JSON_KEY_VALUE_PAIR_SEP = "," 500 IGNORE_NULLS_IN_FUNC = True 501 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 502 SUPPORTS_CREATE_TABLE_LIKE = False 503 MULTI_ARG_DISTINCT = False 504 CAN_IMPLEMENT_ARRAY_ANY = True 505 SUPPORTS_TO_NUMBER = False 506 COPY_HAS_INTO_KEYWORD = False 507 STAR_EXCEPT = "EXCLUDE" 508 PAD_FILL_PATTERN_IS_REQUIRED = True 509 ARRAY_CONCAT_IS_VAR_LEN = False 510 ARRAY_SIZE_DIM_REQUIRED = False 511 512 TRANSFORMS = { 513 **generator.Generator.TRANSFORMS, 514 exp.ApproxDistinct: approx_count_distinct_sql, 515 exp.Array: inline_array_unless_query, 516 exp.ArrayFilter: rename_func("LIST_FILTER"), 517 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 518 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 519 exp.ArraySort: _array_sort_sql, 520 exp.ArraySum: rename_func("LIST_SUM"), 521 exp.BitwiseXor: rename_func("XOR"), 522 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 523 exp.CurrentDate: lambda *_: "CURRENT_DATE", 524 exp.CurrentTime: lambda *_: "CURRENT_TIME", 525 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 526 exp.DayOfMonth: rename_func("DAYOFMONTH"), 527 exp.DayOfWeek: rename_func("DAYOFWEEK"), 528 exp.DayOfWeekIso: rename_func("ISODOW"), 529 exp.DayOfYear: rename_func("DAYOFYEAR"), 530 exp.DataType: _datatype_sql, 531 exp.Date: _date_sql, 532 exp.DateAdd: _date_delta_sql, 533 exp.DateFromParts: rename_func("MAKE_DATE"), 534 exp.DateSub: _date_delta_sql, 535 exp.DateDiff: _date_diff_sql, 536 exp.DateStrToDate: datestrtodate_sql, 537 exp.Datetime: no_datetime_sql, 538 exp.DatetimeSub: _date_delta_sql, 539 exp.DatetimeAdd: _date_delta_sql, 540 exp.DateToDi: lambda self, 541 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 542 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 543 exp.DiToDate: lambda self, 544 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 545 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 546 exp.GenerateDateArray: _generate_datetime_array_sql, 547 exp.GenerateTimestampArray: _generate_datetime_array_sql, 548 exp.Explode: rename_func("UNNEST"), 549 exp.IntDiv: lambda self, e: self.binary(e, "//"), 550 exp.IsInf: rename_func("ISINF"), 551 exp.IsNan: rename_func("ISNAN"), 552 exp.JSONBExists: rename_func("JSON_EXISTS"), 553 exp.JSONExtract: _arrow_json_extract_sql, 554 exp.JSONExtractArray: _json_extract_value_array_sql, 555 exp.JSONExtractScalar: _arrow_json_extract_sql, 556 exp.JSONFormat: _json_format_sql, 557 exp.JSONValueArray: _json_extract_value_array_sql, 558 exp.Lateral: explode_to_unnest_sql, 559 exp.LogicalOr: rename_func("BOOL_OR"), 560 exp.LogicalAnd: rename_func("BOOL_AND"), 561 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 562 exp.MonthsBetween: lambda self, e: self.func( 563 "DATEDIFF", 564 "'month'", 565 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 566 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 567 ), 568 exp.PercentileCont: rename_func("QUANTILE_CONT"), 569 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 570 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 571 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 572 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 573 exp.RegexpReplace: lambda self, e: self.func( 574 "REGEXP_REPLACE", 575 e.this, 576 e.expression, 577 e.args.get("replacement"), 578 e.args.get("modifiers"), 579 ), 580 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 581 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 582 exp.Return: lambda self, e: self.sql(e, "this"), 583 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 584 exp.Rand: rename_func("RANDOM"), 585 exp.SafeDivide: no_safe_divide_sql, 586 exp.SHA: rename_func("SHA1"), 587 exp.SHA2: sha256_sql, 588 exp.Split: rename_func("STR_SPLIT"), 589 exp.SortArray: _sort_array_sql, 590 exp.StrPosition: str_position_sql, 591 exp.StrToUnix: lambda self, e: self.func( 592 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 593 ), 594 exp.Struct: _struct_sql, 595 exp.Transform: rename_func("LIST_TRANSFORM"), 596 exp.TimeAdd: _date_delta_sql, 597 exp.Time: no_time_sql, 598 exp.TimeDiff: _timediff_sql, 599 exp.Timestamp: no_timestamp_sql, 600 exp.TimestampDiff: lambda self, e: self.func( 601 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 602 ), 603 exp.TimestampTrunc: timestamptrunc_sql(), 604 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 605 exp.TimeStrToTime: timestrtotime_sql, 606 exp.TimeStrToUnix: lambda self, e: self.func( 607 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 608 ), 609 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 610 exp.TimeToUnix: rename_func("EPOCH"), 611 exp.TsOrDiToDi: lambda self, 612 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 613 exp.TsOrDsAdd: _date_delta_sql, 614 exp.TsOrDsDiff: lambda self, e: self.func( 615 "DATE_DIFF", 616 f"'{e.args.get('unit') or 'DAY'}'", 617 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 618 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 619 ), 620 exp.UnixToStr: lambda self, e: self.func( 621 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 622 ), 623 exp.DatetimeTrunc: lambda self, e: self.func( 624 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 625 ), 626 exp.UnixToTime: _unix_to_time_sql, 627 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 628 exp.VariancePop: rename_func("VAR_POP"), 629 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 630 exp.Xor: bool_xor_sql, 631 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 632 rename_func("LEVENSHTEIN") 633 ), 634 } 635 636 SUPPORTED_JSON_PATH_PARTS = { 637 exp.JSONPathKey, 638 exp.JSONPathRoot, 639 exp.JSONPathSubscript, 640 exp.JSONPathWildcard, 641 } 642 643 TYPE_MAPPING = { 644 **generator.Generator.TYPE_MAPPING, 645 exp.DataType.Type.BINARY: "BLOB", 646 exp.DataType.Type.BPCHAR: "TEXT", 647 exp.DataType.Type.CHAR: "TEXT", 648 exp.DataType.Type.FLOAT: "REAL", 649 exp.DataType.Type.NCHAR: "TEXT", 650 exp.DataType.Type.NVARCHAR: "TEXT", 651 exp.DataType.Type.UINT: "UINTEGER", 652 exp.DataType.Type.VARBINARY: "BLOB", 653 exp.DataType.Type.ROWVERSION: "BLOB", 654 exp.DataType.Type.VARCHAR: "TEXT", 655 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 656 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 657 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 658 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 659 } 660 661 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 662 RESERVED_KEYWORDS = { 663 "array", 664 "analyse", 665 "union", 666 "all", 667 "when", 668 "in_p", 669 "default", 670 "create_p", 671 "window", 672 "asymmetric", 673 "to", 674 "else", 675 "localtime", 676 "from", 677 "end_p", 678 "select", 679 "current_date", 680 "foreign", 681 "with", 682 "grant", 683 "session_user", 684 "or", 685 "except", 686 "references", 687 "fetch", 688 "limit", 689 "group_p", 690 "leading", 691 "into", 692 "collate", 693 "offset", 694 "do", 695 "then", 696 "localtimestamp", 697 "check_p", 698 "lateral_p", 699 "current_role", 700 "where", 701 "asc_p", 702 "placing", 703 "desc_p", 704 "user", 705 "unique", 706 "initially", 707 "column", 708 "both", 709 "some", 710 "as", 711 "any", 712 "only", 713 "deferrable", 714 "null_p", 715 "current_time", 716 "true_p", 717 "table", 718 "case", 719 "trailing", 720 "variadic", 721 "for", 722 "on", 723 "distinct", 724 "false_p", 725 "not", 726 "constraint", 727 "current_timestamp", 728 "returning", 729 "primary", 730 "intersect", 731 "having", 732 "analyze", 733 "current_user", 734 "and", 735 "cast", 736 "symmetric", 737 "using", 738 "order", 739 "current_catalog", 740 } 741 742 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 743 744 # DuckDB doesn't generally support CREATE TABLE .. properties 745 # https://duckdb.org/docs/sql/statements/create_table.html 746 PROPERTIES_LOCATION = { 747 prop: exp.Properties.Location.UNSUPPORTED 748 for prop in generator.Generator.PROPERTIES_LOCATION 749 } 750 751 # There are a few exceptions (e.g. temporary tables) which are supported or 752 # can be transpiled to DuckDB, so we explicitly override them accordingly 753 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 754 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 755 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 756 757 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 758 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 759 760 def strtotime_sql(self, expression: exp.StrToTime) -> str: 761 if expression.args.get("safe"): 762 formatted_time = self.format_time(expression) 763 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 764 return str_to_time_sql(self, expression) 765 766 def strtodate_sql(self, expression: exp.StrToDate) -> str: 767 if expression.args.get("safe"): 768 formatted_time = self.format_time(expression) 769 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 770 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 771 772 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 773 arg = expression.this 774 if expression.args.get("safe"): 775 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 776 return self.func("JSON", arg) 777 778 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 779 nano = expression.args.get("nano") 780 if nano is not None: 781 expression.set( 782 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 783 ) 784 785 return rename_func("MAKE_TIME")(self, expression) 786 787 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 788 sec = expression.args["sec"] 789 790 milli = expression.args.get("milli") 791 if milli is not None: 792 sec += milli.pop() / exp.Literal.number(1000.0) 793 794 nano = expression.args.get("nano") 795 if nano is not None: 796 sec += nano.pop() / exp.Literal.number(1000000000.0) 797 798 if milli or nano: 799 expression.set("sec", sec) 800 801 return rename_func("MAKE_TIMESTAMP")(self, expression) 802 803 def tablesample_sql( 804 self, 805 expression: exp.TableSample, 806 tablesample_keyword: t.Optional[str] = None, 807 ) -> str: 808 if not isinstance(expression.parent, exp.Select): 809 # This sample clause only applies to a single source, not the entire resulting relation 810 tablesample_keyword = "TABLESAMPLE" 811 812 if expression.args.get("size"): 813 method = expression.args.get("method") 814 if method and method.name.upper() != "RESERVOIR": 815 self.unsupported( 816 f"Sampling method {method} is not supported with a discrete sample count, " 817 "defaulting to reservoir sampling" 818 ) 819 expression.set("method", exp.var("RESERVOIR")) 820 821 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 822 823 def interval_sql(self, expression: exp.Interval) -> str: 824 multiplier: t.Optional[int] = None 825 unit = expression.text("unit").lower() 826 827 if unit.startswith("week"): 828 multiplier = 7 829 if unit.startswith("quarter"): 830 multiplier = 90 831 832 if multiplier: 833 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 834 835 return super().interval_sql(expression) 836 837 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 838 if isinstance(expression.parent, exp.UserDefinedFunction): 839 return self.sql(expression, "this") 840 return super().columndef_sql(expression, sep) 841 842 def join_sql(self, expression: exp.Join) -> str: 843 if ( 844 expression.side == "LEFT" 845 and not expression.args.get("on") 846 and isinstance(expression.this, exp.Unnest) 847 ): 848 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 849 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 850 return super().join_sql(expression.on(exp.true())) 851 852 return super().join_sql(expression) 853 854 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 855 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 856 if expression.args.get("is_end_exclusive"): 857 return rename_func("RANGE")(self, expression) 858 859 return self.function_fallback_sql(expression) 860 861 def bracket_sql(self, expression: exp.Bracket) -> str: 862 this = expression.this 863 if isinstance(this, exp.Array): 864 this.replace(exp.paren(this)) 865 866 bracket = super().bracket_sql(expression) 867 868 if not expression.args.get("returns_list_for_maps"): 869 if not this.type: 870 from sqlglot.optimizer.annotate_types import annotate_types 871 872 this = annotate_types(this) 873 874 if this.is_type(exp.DataType.Type.MAP): 875 bracket = f"({bracket})[1]" 876 877 return bracket 878 879 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 880 expression_sql = self.sql(expression, "expression") 881 882 func = expression.this 883 if isinstance(func, exp.PERCENTILES): 884 # Make the order key the first arg and slide the fraction to the right 885 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 886 order_col = expression.find(exp.Ordered) 887 if order_col: 888 func.set("expression", func.this) 889 func.set("this", order_col.this) 890 891 this = self.sql(expression, "this").rstrip(")") 892 893 return f"{this}{expression_sql})" 894 895 def length_sql(self, expression: exp.Length) -> str: 896 arg = expression.this 897 898 # Dialects like BQ and Snowflake also accept binary values as args, so 899 # DDB will attempt to infer the type or resort to case/when resolution 900 if not expression.args.get("binary") or arg.is_string: 901 return self.func("LENGTH", arg) 902 903 if not arg.type: 904 from sqlglot.optimizer.annotate_types import annotate_types 905 906 arg = annotate_types(arg) 907 908 if arg.is_type(*exp.DataType.TEXT_TYPES): 909 return self.func("LENGTH", arg) 910 911 # We need these casts to make duckdb's static type checker happy 912 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 913 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 914 915 case = ( 916 exp.case(self.func("TYPEOF", arg)) 917 .when( 918 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 919 ) # anonymous to break length_sql recursion 920 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 921 ) 922 923 return self.sql(case) 924 925 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 926 this = expression.this 927 key = expression.args.get("key") 928 key_sql = key.name if isinstance(key, exp.Expression) else "" 929 value_sql = self.sql(expression, "value") 930 931 kv_sql = f"{key_sql} := {value_sql}" 932 933 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 934 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 935 if isinstance(this, exp.Struct) and not this.expressions: 936 return self.func("STRUCT_PACK", kv_sql) 937 938 return self.func("STRUCT_INSERT", this, kv_sql) 939 940 def unnest_sql(self, expression: exp.Unnest) -> str: 941 explode_array = expression.args.get("explode_array") 942 if explode_array: 943 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 944 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 945 expression.expressions.append( 946 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 947 ) 948 949 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 950 alias = expression.args.get("alias") 951 if alias: 952 expression.set("alias", None) 953 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 954 955 unnest_sql = super().unnest_sql(expression) 956 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 957 return self.sql(select) 958 959 return super().unnest_sql(expression) 960 961 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 962 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 963 # DuckDB should render IGNORE NULLS only for the general-purpose 964 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 965 return super().ignorenulls_sql(expression) 966 967 return self.sql(expression, "this") 968 969 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 970 this = self.sql(expression, "this") 971 null_text = self.sql(expression, "null") 972 973 if null_text: 974 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 975 976 return self.func("ARRAY_TO_STRING", this, expression.expression) 977 978 @unsupported_args("position", "occurrence") 979 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 980 group = expression.args.get("group") 981 params = expression.args.get("parameters") 982 983 # Do not render group if there is no following argument, 984 # and it's the default value for this dialect 985 if ( 986 not params 987 and group 988 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 989 ): 990 group = None 991 return self.func( 992 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 993 )
275class DuckDB(Dialect): 276 NULL_ORDERING = "nulls_are_last" 277 SUPPORTS_USER_DEFINED_TYPES = False 278 SAFE_DIVISION = True 279 INDEX_OFFSET = 1 280 CONCAT_COALESCE = True 281 SUPPORTS_ORDER_BY_ALL = True 282 SUPPORTS_FIXED_SIZE_ARRAYS = True 283 STRICT_JSON_PATH_SYNTAX = False 284 285 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 286 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 287 288 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 289 if isinstance(path, exp.Literal): 290 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 291 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 292 # This check ensures we'll avoid trying to parse these as JSON paths, which can 293 # either result in a noisy warning or in an invalid representation of the path. 294 path_text = path.name 295 if path_text.startswith("/") or "[#" in path_text: 296 return path 297 298 return super().to_json_path(path) 299 300 class Tokenizer(tokens.Tokenizer): 301 HEREDOC_STRINGS = ["$"] 302 303 HEREDOC_TAG_IS_IDENTIFIER = True 304 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 305 306 KEYWORDS = { 307 **tokens.Tokenizer.KEYWORDS, 308 "//": TokenType.DIV, 309 "**": TokenType.DSTAR, 310 "^@": TokenType.CARET_AT, 311 "@>": TokenType.AT_GT, 312 "<@": TokenType.LT_AT, 313 "ATTACH": TokenType.COMMAND, 314 "BINARY": TokenType.VARBINARY, 315 "BITSTRING": TokenType.BIT, 316 "BPCHAR": TokenType.TEXT, 317 "CHAR": TokenType.TEXT, 318 "CHARACTER VARYING": TokenType.TEXT, 319 "EXCLUDE": TokenType.EXCEPT, 320 "LOGICAL": TokenType.BOOLEAN, 321 "ONLY": TokenType.ONLY, 322 "PIVOT_WIDER": TokenType.PIVOT, 323 "POSITIONAL": TokenType.POSITIONAL, 324 "SIGNED": TokenType.INT, 325 "STRING": TokenType.TEXT, 326 "SUMMARIZE": TokenType.SUMMARIZE, 327 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 328 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 329 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 330 "TIMESTAMP_US": TokenType.TIMESTAMP, 331 "UBIGINT": TokenType.UBIGINT, 332 "UINTEGER": TokenType.UINT, 333 "USMALLINT": TokenType.USMALLINT, 334 "UTINYINT": TokenType.UTINYINT, 335 "VARCHAR": TokenType.TEXT, 336 } 337 KEYWORDS.pop("/*+") 338 339 SINGLE_TOKENS = { 340 **tokens.Tokenizer.SINGLE_TOKENS, 341 "$": TokenType.PARAMETER, 342 } 343 344 class Parser(parser.Parser): 345 BITWISE = { 346 **parser.Parser.BITWISE, 347 TokenType.TILDA: exp.RegexpLike, 348 } 349 BITWISE.pop(TokenType.CARET) 350 351 RANGE_PARSERS = { 352 **parser.Parser.RANGE_PARSERS, 353 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 354 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 355 } 356 357 EXPONENT = { 358 **parser.Parser.EXPONENT, 359 TokenType.CARET: exp.Pow, 360 TokenType.DSTAR: exp.Pow, 361 } 362 363 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 364 365 FUNCTIONS = { 366 **parser.Parser.FUNCTIONS, 367 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 368 "ARRAY_SORT": exp.SortArray.from_arg_list, 369 "DATEDIFF": _build_date_diff, 370 "DATE_DIFF": _build_date_diff, 371 "DATE_TRUNC": date_trunc_to_time, 372 "DATETRUNC": date_trunc_to_time, 373 "DECODE": lambda args: exp.Decode( 374 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 375 ), 376 "ENCODE": lambda args: exp.Encode( 377 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 378 ), 379 "EPOCH": exp.TimeToUnix.from_arg_list, 380 "EPOCH_MS": lambda args: exp.UnixToTime( 381 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 382 ), 383 "JSON": exp.ParseJSON.from_arg_list, 384 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 385 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 386 "LIST_HAS": exp.ArrayContains.from_arg_list, 387 "LIST_REVERSE_SORT": _build_sort_array_desc, 388 "LIST_SORT": exp.SortArray.from_arg_list, 389 "LIST_VALUE": lambda args: exp.Array(expressions=args), 390 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 391 "MAKE_TIMESTAMP": _build_make_timestamp, 392 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 393 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 394 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 395 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 396 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 397 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 398 this=seq_get(args, 0), 399 expression=seq_get(args, 1), 400 replacement=seq_get(args, 2), 401 modifiers=seq_get(args, 3), 402 ), 403 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 404 "STRING_SPLIT": exp.Split.from_arg_list, 405 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 406 "STRING_TO_ARRAY": exp.Split.from_arg_list, 407 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 408 "STRUCT_PACK": exp.Struct.from_arg_list, 409 "STR_SPLIT": exp.Split.from_arg_list, 410 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 411 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 412 "UNNEST": exp.Explode.from_arg_list, 413 "XOR": binary_from_function(exp.BitwiseXor), 414 "GENERATE_SERIES": _build_generate_series(), 415 "RANGE": _build_generate_series(end_exclusive=True), 416 "EDITDIST3": exp.Levenshtein.from_arg_list, 417 } 418 419 FUNCTIONS.pop("DATE_SUB") 420 FUNCTIONS.pop("GLOB") 421 422 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 423 FUNCTION_PARSERS.pop("DECODE") 424 425 NO_PAREN_FUNCTION_PARSERS = { 426 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 427 "MAP": lambda self: self._parse_map(), 428 } 429 430 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 431 TokenType.SEMI, 432 TokenType.ANTI, 433 } 434 435 PLACEHOLDER_PARSERS = { 436 **parser.Parser.PLACEHOLDER_PARSERS, 437 TokenType.PARAMETER: lambda self: ( 438 self.expression(exp.Placeholder, this=self._prev.text) 439 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 440 else None 441 ), 442 } 443 444 TYPE_CONVERTERS = { 445 # https://duckdb.org/docs/sql/data_types/numeric 446 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 447 # https://duckdb.org/docs/sql/data_types/text 448 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 449 } 450 451 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 452 # https://duckdb.org/docs/sql/samples.html 453 sample = super()._parse_table_sample(as_modifier=as_modifier) 454 if sample and not sample.args.get("method"): 455 if sample.args.get("size"): 456 sample.set("method", exp.var("RESERVOIR")) 457 else: 458 sample.set("method", exp.var("SYSTEM")) 459 460 return sample 461 462 def _parse_bracket( 463 self, this: t.Optional[exp.Expression] = None 464 ) -> t.Optional[exp.Expression]: 465 bracket = super()._parse_bracket(this) 466 if isinstance(bracket, exp.Bracket): 467 bracket.set("returns_list_for_maps", True) 468 469 return bracket 470 471 def _parse_map(self) -> exp.ToMap | exp.Map: 472 if self._match(TokenType.L_BRACE, advance=False): 473 return self.expression(exp.ToMap, this=self._parse_bracket()) 474 475 args = self._parse_wrapped_csv(self._parse_assignment) 476 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 477 478 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 479 return self._parse_field_def() 480 481 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 482 if len(aggregations) == 1: 483 return super()._pivot_column_names(aggregations) 484 return pivot_column_names(aggregations, dialect="duckdb") 485 486 class Generator(generator.Generator): 487 PARAMETER_TOKEN = "$" 488 NAMED_PLACEHOLDER_TOKEN = "$" 489 JOIN_HINTS = False 490 TABLE_HINTS = False 491 QUERY_HINTS = False 492 LIMIT_FETCH = "LIMIT" 493 STRUCT_DELIMITER = ("(", ")") 494 RENAME_TABLE_WITH_DB = False 495 NVL2_SUPPORTED = False 496 SEMI_ANTI_JOIN_WITH_SIDE = False 497 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 498 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 499 LAST_DAY_SUPPORTS_DATE_PART = False 500 JSON_KEY_VALUE_PAIR_SEP = "," 501 IGNORE_NULLS_IN_FUNC = True 502 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 503 SUPPORTS_CREATE_TABLE_LIKE = False 504 MULTI_ARG_DISTINCT = False 505 CAN_IMPLEMENT_ARRAY_ANY = True 506 SUPPORTS_TO_NUMBER = False 507 COPY_HAS_INTO_KEYWORD = False 508 STAR_EXCEPT = "EXCLUDE" 509 PAD_FILL_PATTERN_IS_REQUIRED = True 510 ARRAY_CONCAT_IS_VAR_LEN = False 511 ARRAY_SIZE_DIM_REQUIRED = False 512 513 TRANSFORMS = { 514 **generator.Generator.TRANSFORMS, 515 exp.ApproxDistinct: approx_count_distinct_sql, 516 exp.Array: inline_array_unless_query, 517 exp.ArrayFilter: rename_func("LIST_FILTER"), 518 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 519 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 520 exp.ArraySort: _array_sort_sql, 521 exp.ArraySum: rename_func("LIST_SUM"), 522 exp.BitwiseXor: rename_func("XOR"), 523 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 524 exp.CurrentDate: lambda *_: "CURRENT_DATE", 525 exp.CurrentTime: lambda *_: "CURRENT_TIME", 526 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 527 exp.DayOfMonth: rename_func("DAYOFMONTH"), 528 exp.DayOfWeek: rename_func("DAYOFWEEK"), 529 exp.DayOfWeekIso: rename_func("ISODOW"), 530 exp.DayOfYear: rename_func("DAYOFYEAR"), 531 exp.DataType: _datatype_sql, 532 exp.Date: _date_sql, 533 exp.DateAdd: _date_delta_sql, 534 exp.DateFromParts: rename_func("MAKE_DATE"), 535 exp.DateSub: _date_delta_sql, 536 exp.DateDiff: _date_diff_sql, 537 exp.DateStrToDate: datestrtodate_sql, 538 exp.Datetime: no_datetime_sql, 539 exp.DatetimeSub: _date_delta_sql, 540 exp.DatetimeAdd: _date_delta_sql, 541 exp.DateToDi: lambda self, 542 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 543 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 544 exp.DiToDate: lambda self, 545 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 546 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 547 exp.GenerateDateArray: _generate_datetime_array_sql, 548 exp.GenerateTimestampArray: _generate_datetime_array_sql, 549 exp.Explode: rename_func("UNNEST"), 550 exp.IntDiv: lambda self, e: self.binary(e, "//"), 551 exp.IsInf: rename_func("ISINF"), 552 exp.IsNan: rename_func("ISNAN"), 553 exp.JSONBExists: rename_func("JSON_EXISTS"), 554 exp.JSONExtract: _arrow_json_extract_sql, 555 exp.JSONExtractArray: _json_extract_value_array_sql, 556 exp.JSONExtractScalar: _arrow_json_extract_sql, 557 exp.JSONFormat: _json_format_sql, 558 exp.JSONValueArray: _json_extract_value_array_sql, 559 exp.Lateral: explode_to_unnest_sql, 560 exp.LogicalOr: rename_func("BOOL_OR"), 561 exp.LogicalAnd: rename_func("BOOL_AND"), 562 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 563 exp.MonthsBetween: lambda self, e: self.func( 564 "DATEDIFF", 565 "'month'", 566 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 567 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 568 ), 569 exp.PercentileCont: rename_func("QUANTILE_CONT"), 570 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 571 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 572 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 573 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 574 exp.RegexpReplace: lambda self, e: self.func( 575 "REGEXP_REPLACE", 576 e.this, 577 e.expression, 578 e.args.get("replacement"), 579 e.args.get("modifiers"), 580 ), 581 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 582 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 583 exp.Return: lambda self, e: self.sql(e, "this"), 584 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 585 exp.Rand: rename_func("RANDOM"), 586 exp.SafeDivide: no_safe_divide_sql, 587 exp.SHA: rename_func("SHA1"), 588 exp.SHA2: sha256_sql, 589 exp.Split: rename_func("STR_SPLIT"), 590 exp.SortArray: _sort_array_sql, 591 exp.StrPosition: str_position_sql, 592 exp.StrToUnix: lambda self, e: self.func( 593 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 594 ), 595 exp.Struct: _struct_sql, 596 exp.Transform: rename_func("LIST_TRANSFORM"), 597 exp.TimeAdd: _date_delta_sql, 598 exp.Time: no_time_sql, 599 exp.TimeDiff: _timediff_sql, 600 exp.Timestamp: no_timestamp_sql, 601 exp.TimestampDiff: lambda self, e: self.func( 602 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 603 ), 604 exp.TimestampTrunc: timestamptrunc_sql(), 605 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 606 exp.TimeStrToTime: timestrtotime_sql, 607 exp.TimeStrToUnix: lambda self, e: self.func( 608 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 609 ), 610 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 611 exp.TimeToUnix: rename_func("EPOCH"), 612 exp.TsOrDiToDi: lambda self, 613 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 614 exp.TsOrDsAdd: _date_delta_sql, 615 exp.TsOrDsDiff: lambda self, e: self.func( 616 "DATE_DIFF", 617 f"'{e.args.get('unit') or 'DAY'}'", 618 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 619 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 620 ), 621 exp.UnixToStr: lambda self, e: self.func( 622 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 623 ), 624 exp.DatetimeTrunc: lambda self, e: self.func( 625 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 626 ), 627 exp.UnixToTime: _unix_to_time_sql, 628 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 629 exp.VariancePop: rename_func("VAR_POP"), 630 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 631 exp.Xor: bool_xor_sql, 632 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 633 rename_func("LEVENSHTEIN") 634 ), 635 } 636 637 SUPPORTED_JSON_PATH_PARTS = { 638 exp.JSONPathKey, 639 exp.JSONPathRoot, 640 exp.JSONPathSubscript, 641 exp.JSONPathWildcard, 642 } 643 644 TYPE_MAPPING = { 645 **generator.Generator.TYPE_MAPPING, 646 exp.DataType.Type.BINARY: "BLOB", 647 exp.DataType.Type.BPCHAR: "TEXT", 648 exp.DataType.Type.CHAR: "TEXT", 649 exp.DataType.Type.FLOAT: "REAL", 650 exp.DataType.Type.NCHAR: "TEXT", 651 exp.DataType.Type.NVARCHAR: "TEXT", 652 exp.DataType.Type.UINT: "UINTEGER", 653 exp.DataType.Type.VARBINARY: "BLOB", 654 exp.DataType.Type.ROWVERSION: "BLOB", 655 exp.DataType.Type.VARCHAR: "TEXT", 656 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 657 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 658 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 659 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 660 } 661 662 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 663 RESERVED_KEYWORDS = { 664 "array", 665 "analyse", 666 "union", 667 "all", 668 "when", 669 "in_p", 670 "default", 671 "create_p", 672 "window", 673 "asymmetric", 674 "to", 675 "else", 676 "localtime", 677 "from", 678 "end_p", 679 "select", 680 "current_date", 681 "foreign", 682 "with", 683 "grant", 684 "session_user", 685 "or", 686 "except", 687 "references", 688 "fetch", 689 "limit", 690 "group_p", 691 "leading", 692 "into", 693 "collate", 694 "offset", 695 "do", 696 "then", 697 "localtimestamp", 698 "check_p", 699 "lateral_p", 700 "current_role", 701 "where", 702 "asc_p", 703 "placing", 704 "desc_p", 705 "user", 706 "unique", 707 "initially", 708 "column", 709 "both", 710 "some", 711 "as", 712 "any", 713 "only", 714 "deferrable", 715 "null_p", 716 "current_time", 717 "true_p", 718 "table", 719 "case", 720 "trailing", 721 "variadic", 722 "for", 723 "on", 724 "distinct", 725 "false_p", 726 "not", 727 "constraint", 728 "current_timestamp", 729 "returning", 730 "primary", 731 "intersect", 732 "having", 733 "analyze", 734 "current_user", 735 "and", 736 "cast", 737 "symmetric", 738 "using", 739 "order", 740 "current_catalog", 741 } 742 743 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 744 745 # DuckDB doesn't generally support CREATE TABLE .. properties 746 # https://duckdb.org/docs/sql/statements/create_table.html 747 PROPERTIES_LOCATION = { 748 prop: exp.Properties.Location.UNSUPPORTED 749 for prop in generator.Generator.PROPERTIES_LOCATION 750 } 751 752 # There are a few exceptions (e.g. temporary tables) which are supported or 753 # can be transpiled to DuckDB, so we explicitly override them accordingly 754 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 755 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 756 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 757 758 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 759 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 760 761 def strtotime_sql(self, expression: exp.StrToTime) -> str: 762 if expression.args.get("safe"): 763 formatted_time = self.format_time(expression) 764 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 765 return str_to_time_sql(self, expression) 766 767 def strtodate_sql(self, expression: exp.StrToDate) -> str: 768 if expression.args.get("safe"): 769 formatted_time = self.format_time(expression) 770 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 771 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 772 773 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 774 arg = expression.this 775 if expression.args.get("safe"): 776 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 777 return self.func("JSON", arg) 778 779 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 780 nano = expression.args.get("nano") 781 if nano is not None: 782 expression.set( 783 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 784 ) 785 786 return rename_func("MAKE_TIME")(self, expression) 787 788 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 789 sec = expression.args["sec"] 790 791 milli = expression.args.get("milli") 792 if milli is not None: 793 sec += milli.pop() / exp.Literal.number(1000.0) 794 795 nano = expression.args.get("nano") 796 if nano is not None: 797 sec += nano.pop() / exp.Literal.number(1000000000.0) 798 799 if milli or nano: 800 expression.set("sec", sec) 801 802 return rename_func("MAKE_TIMESTAMP")(self, expression) 803 804 def tablesample_sql( 805 self, 806 expression: exp.TableSample, 807 tablesample_keyword: t.Optional[str] = None, 808 ) -> str: 809 if not isinstance(expression.parent, exp.Select): 810 # This sample clause only applies to a single source, not the entire resulting relation 811 tablesample_keyword = "TABLESAMPLE" 812 813 if expression.args.get("size"): 814 method = expression.args.get("method") 815 if method and method.name.upper() != "RESERVOIR": 816 self.unsupported( 817 f"Sampling method {method} is not supported with a discrete sample count, " 818 "defaulting to reservoir sampling" 819 ) 820 expression.set("method", exp.var("RESERVOIR")) 821 822 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 823 824 def interval_sql(self, expression: exp.Interval) -> str: 825 multiplier: t.Optional[int] = None 826 unit = expression.text("unit").lower() 827 828 if unit.startswith("week"): 829 multiplier = 7 830 if unit.startswith("quarter"): 831 multiplier = 90 832 833 if multiplier: 834 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 835 836 return super().interval_sql(expression) 837 838 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 839 if isinstance(expression.parent, exp.UserDefinedFunction): 840 return self.sql(expression, "this") 841 return super().columndef_sql(expression, sep) 842 843 def join_sql(self, expression: exp.Join) -> str: 844 if ( 845 expression.side == "LEFT" 846 and not expression.args.get("on") 847 and isinstance(expression.this, exp.Unnest) 848 ): 849 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 850 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 851 return super().join_sql(expression.on(exp.true())) 852 853 return super().join_sql(expression) 854 855 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 856 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 857 if expression.args.get("is_end_exclusive"): 858 return rename_func("RANGE")(self, expression) 859 860 return self.function_fallback_sql(expression) 861 862 def bracket_sql(self, expression: exp.Bracket) -> str: 863 this = expression.this 864 if isinstance(this, exp.Array): 865 this.replace(exp.paren(this)) 866 867 bracket = super().bracket_sql(expression) 868 869 if not expression.args.get("returns_list_for_maps"): 870 if not this.type: 871 from sqlglot.optimizer.annotate_types import annotate_types 872 873 this = annotate_types(this) 874 875 if this.is_type(exp.DataType.Type.MAP): 876 bracket = f"({bracket})[1]" 877 878 return bracket 879 880 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 881 expression_sql = self.sql(expression, "expression") 882 883 func = expression.this 884 if isinstance(func, exp.PERCENTILES): 885 # Make the order key the first arg and slide the fraction to the right 886 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 887 order_col = expression.find(exp.Ordered) 888 if order_col: 889 func.set("expression", func.this) 890 func.set("this", order_col.this) 891 892 this = self.sql(expression, "this").rstrip(")") 893 894 return f"{this}{expression_sql})" 895 896 def length_sql(self, expression: exp.Length) -> str: 897 arg = expression.this 898 899 # Dialects like BQ and Snowflake also accept binary values as args, so 900 # DDB will attempt to infer the type or resort to case/when resolution 901 if not expression.args.get("binary") or arg.is_string: 902 return self.func("LENGTH", arg) 903 904 if not arg.type: 905 from sqlglot.optimizer.annotate_types import annotate_types 906 907 arg = annotate_types(arg) 908 909 if arg.is_type(*exp.DataType.TEXT_TYPES): 910 return self.func("LENGTH", arg) 911 912 # We need these casts to make duckdb's static type checker happy 913 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 914 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 915 916 case = ( 917 exp.case(self.func("TYPEOF", arg)) 918 .when( 919 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 920 ) # anonymous to break length_sql recursion 921 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 922 ) 923 924 return self.sql(case) 925 926 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 927 this = expression.this 928 key = expression.args.get("key") 929 key_sql = key.name if isinstance(key, exp.Expression) else "" 930 value_sql = self.sql(expression, "value") 931 932 kv_sql = f"{key_sql} := {value_sql}" 933 934 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 935 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 936 if isinstance(this, exp.Struct) and not this.expressions: 937 return self.func("STRUCT_PACK", kv_sql) 938 939 return self.func("STRUCT_INSERT", this, kv_sql) 940 941 def unnest_sql(self, expression: exp.Unnest) -> str: 942 explode_array = expression.args.get("explode_array") 943 if explode_array: 944 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 945 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 946 expression.expressions.append( 947 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 948 ) 949 950 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 951 alias = expression.args.get("alias") 952 if alias: 953 expression.set("alias", None) 954 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 955 956 unnest_sql = super().unnest_sql(expression) 957 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 958 return self.sql(select) 959 960 return super().unnest_sql(expression) 961 962 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 963 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 964 # DuckDB should render IGNORE NULLS only for the general-purpose 965 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 966 return super().ignorenulls_sql(expression) 967 968 return self.sql(expression, "this") 969 970 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 971 this = self.sql(expression, "this") 972 null_text = self.sql(expression, "null") 973 974 if null_text: 975 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 976 977 return self.func("ARRAY_TO_STRING", this, expression.expression) 978 979 @unsupported_args("position", "occurrence") 980 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 981 group = expression.args.get("group") 982 params = expression.args.get("parameters") 983 984 # Do not render group if there is no following argument, 985 # and it's the default value for this dialect 986 if ( 987 not params 988 and group 989 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 990 ): 991 group = None 992 return self.func( 993 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 994 )
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Specifies the strategy according to which identifiers should be normalized.
288 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 289 if isinstance(path, exp.Literal): 290 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 291 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 292 # This check ensures we'll avoid trying to parse these as JSON paths, which can 293 # either result in a noisy warning or in an invalid representation of the path. 294 path_text = path.name 295 if path_text.startswith("/") or "[#" in path_text: 296 return path 297 298 return super().to_json_path(path)
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- SET_OP_DISTINCT_BY_DEFAULT
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
300 class Tokenizer(tokens.Tokenizer): 301 HEREDOC_STRINGS = ["$"] 302 303 HEREDOC_TAG_IS_IDENTIFIER = True 304 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 305 306 KEYWORDS = { 307 **tokens.Tokenizer.KEYWORDS, 308 "//": TokenType.DIV, 309 "**": TokenType.DSTAR, 310 "^@": TokenType.CARET_AT, 311 "@>": TokenType.AT_GT, 312 "<@": TokenType.LT_AT, 313 "ATTACH": TokenType.COMMAND, 314 "BINARY": TokenType.VARBINARY, 315 "BITSTRING": TokenType.BIT, 316 "BPCHAR": TokenType.TEXT, 317 "CHAR": TokenType.TEXT, 318 "CHARACTER VARYING": TokenType.TEXT, 319 "EXCLUDE": TokenType.EXCEPT, 320 "LOGICAL": TokenType.BOOLEAN, 321 "ONLY": TokenType.ONLY, 322 "PIVOT_WIDER": TokenType.PIVOT, 323 "POSITIONAL": TokenType.POSITIONAL, 324 "SIGNED": TokenType.INT, 325 "STRING": TokenType.TEXT, 326 "SUMMARIZE": TokenType.SUMMARIZE, 327 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 328 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 329 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 330 "TIMESTAMP_US": TokenType.TIMESTAMP, 331 "UBIGINT": TokenType.UBIGINT, 332 "UINTEGER": TokenType.UINT, 333 "USMALLINT": TokenType.USMALLINT, 334 "UTINYINT": TokenType.UTINYINT, 335 "VARCHAR": TokenType.TEXT, 336 } 337 KEYWORDS.pop("/*+") 338 339 SINGLE_TOKENS = { 340 **tokens.Tokenizer.SINGLE_TOKENS, 341 "$": TokenType.PARAMETER, 342 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
344 class Parser(parser.Parser): 345 BITWISE = { 346 **parser.Parser.BITWISE, 347 TokenType.TILDA: exp.RegexpLike, 348 } 349 BITWISE.pop(TokenType.CARET) 350 351 RANGE_PARSERS = { 352 **parser.Parser.RANGE_PARSERS, 353 TokenType.DAMP: binary_range_parser(exp.ArrayOverlaps), 354 TokenType.CARET_AT: binary_range_parser(exp.StartsWith), 355 } 356 357 EXPONENT = { 358 **parser.Parser.EXPONENT, 359 TokenType.CARET: exp.Pow, 360 TokenType.DSTAR: exp.Pow, 361 } 362 363 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 364 365 FUNCTIONS = { 366 **parser.Parser.FUNCTIONS, 367 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 368 "ARRAY_SORT": exp.SortArray.from_arg_list, 369 "DATEDIFF": _build_date_diff, 370 "DATE_DIFF": _build_date_diff, 371 "DATE_TRUNC": date_trunc_to_time, 372 "DATETRUNC": date_trunc_to_time, 373 "DECODE": lambda args: exp.Decode( 374 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 375 ), 376 "ENCODE": lambda args: exp.Encode( 377 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 378 ), 379 "EPOCH": exp.TimeToUnix.from_arg_list, 380 "EPOCH_MS": lambda args: exp.UnixToTime( 381 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 382 ), 383 "JSON": exp.ParseJSON.from_arg_list, 384 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 385 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 386 "LIST_HAS": exp.ArrayContains.from_arg_list, 387 "LIST_REVERSE_SORT": _build_sort_array_desc, 388 "LIST_SORT": exp.SortArray.from_arg_list, 389 "LIST_VALUE": lambda args: exp.Array(expressions=args), 390 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 391 "MAKE_TIMESTAMP": _build_make_timestamp, 392 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 393 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 394 "REGEXP_EXTRACT": build_regexp_extract(exp.RegexpExtract), 395 "REGEXP_EXTRACT_ALL": build_regexp_extract(exp.RegexpExtractAll), 396 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 397 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 398 this=seq_get(args, 0), 399 expression=seq_get(args, 1), 400 replacement=seq_get(args, 2), 401 modifiers=seq_get(args, 3), 402 ), 403 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 404 "STRING_SPLIT": exp.Split.from_arg_list, 405 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 406 "STRING_TO_ARRAY": exp.Split.from_arg_list, 407 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 408 "STRUCT_PACK": exp.Struct.from_arg_list, 409 "STR_SPLIT": exp.Split.from_arg_list, 410 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 411 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 412 "UNNEST": exp.Explode.from_arg_list, 413 "XOR": binary_from_function(exp.BitwiseXor), 414 "GENERATE_SERIES": _build_generate_series(), 415 "RANGE": _build_generate_series(end_exclusive=True), 416 "EDITDIST3": exp.Levenshtein.from_arg_list, 417 } 418 419 FUNCTIONS.pop("DATE_SUB") 420 FUNCTIONS.pop("GLOB") 421 422 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 423 FUNCTION_PARSERS.pop("DECODE") 424 425 NO_PAREN_FUNCTION_PARSERS = { 426 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 427 "MAP": lambda self: self._parse_map(), 428 } 429 430 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 431 TokenType.SEMI, 432 TokenType.ANTI, 433 } 434 435 PLACEHOLDER_PARSERS = { 436 **parser.Parser.PLACEHOLDER_PARSERS, 437 TokenType.PARAMETER: lambda self: ( 438 self.expression(exp.Placeholder, this=self._prev.text) 439 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 440 else None 441 ), 442 } 443 444 TYPE_CONVERTERS = { 445 # https://duckdb.org/docs/sql/data_types/numeric 446 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 447 # https://duckdb.org/docs/sql/data_types/text 448 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 449 } 450 451 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 452 # https://duckdb.org/docs/sql/samples.html 453 sample = super()._parse_table_sample(as_modifier=as_modifier) 454 if sample and not sample.args.get("method"): 455 if sample.args.get("size"): 456 sample.set("method", exp.var("RESERVOIR")) 457 else: 458 sample.set("method", exp.var("SYSTEM")) 459 460 return sample 461 462 def _parse_bracket( 463 self, this: t.Optional[exp.Expression] = None 464 ) -> t.Optional[exp.Expression]: 465 bracket = super()._parse_bracket(this) 466 if isinstance(bracket, exp.Bracket): 467 bracket.set("returns_list_for_maps", True) 468 469 return bracket 470 471 def _parse_map(self) -> exp.ToMap | exp.Map: 472 if self._match(TokenType.L_BRACE, advance=False): 473 return self.expression(exp.ToMap, this=self._parse_bracket()) 474 475 args = self._parse_wrapped_csv(self._parse_assignment) 476 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 477 478 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 479 return self._parse_field_def() 480 481 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 482 if len(aggregations) == 1: 483 return super()._pivot_column_names(aggregations) 484 return pivot_column_names(aggregations, dialect="duckdb")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
486 class Generator(generator.Generator): 487 PARAMETER_TOKEN = "$" 488 NAMED_PLACEHOLDER_TOKEN = "$" 489 JOIN_HINTS = False 490 TABLE_HINTS = False 491 QUERY_HINTS = False 492 LIMIT_FETCH = "LIMIT" 493 STRUCT_DELIMITER = ("(", ")") 494 RENAME_TABLE_WITH_DB = False 495 NVL2_SUPPORTED = False 496 SEMI_ANTI_JOIN_WITH_SIDE = False 497 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 498 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 499 LAST_DAY_SUPPORTS_DATE_PART = False 500 JSON_KEY_VALUE_PAIR_SEP = "," 501 IGNORE_NULLS_IN_FUNC = True 502 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 503 SUPPORTS_CREATE_TABLE_LIKE = False 504 MULTI_ARG_DISTINCT = False 505 CAN_IMPLEMENT_ARRAY_ANY = True 506 SUPPORTS_TO_NUMBER = False 507 COPY_HAS_INTO_KEYWORD = False 508 STAR_EXCEPT = "EXCLUDE" 509 PAD_FILL_PATTERN_IS_REQUIRED = True 510 ARRAY_CONCAT_IS_VAR_LEN = False 511 ARRAY_SIZE_DIM_REQUIRED = False 512 513 TRANSFORMS = { 514 **generator.Generator.TRANSFORMS, 515 exp.ApproxDistinct: approx_count_distinct_sql, 516 exp.Array: inline_array_unless_query, 517 exp.ArrayFilter: rename_func("LIST_FILTER"), 518 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 519 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 520 exp.ArraySort: _array_sort_sql, 521 exp.ArraySum: rename_func("LIST_SUM"), 522 exp.BitwiseXor: rename_func("XOR"), 523 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 524 exp.CurrentDate: lambda *_: "CURRENT_DATE", 525 exp.CurrentTime: lambda *_: "CURRENT_TIME", 526 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 527 exp.DayOfMonth: rename_func("DAYOFMONTH"), 528 exp.DayOfWeek: rename_func("DAYOFWEEK"), 529 exp.DayOfWeekIso: rename_func("ISODOW"), 530 exp.DayOfYear: rename_func("DAYOFYEAR"), 531 exp.DataType: _datatype_sql, 532 exp.Date: _date_sql, 533 exp.DateAdd: _date_delta_sql, 534 exp.DateFromParts: rename_func("MAKE_DATE"), 535 exp.DateSub: _date_delta_sql, 536 exp.DateDiff: _date_diff_sql, 537 exp.DateStrToDate: datestrtodate_sql, 538 exp.Datetime: no_datetime_sql, 539 exp.DatetimeSub: _date_delta_sql, 540 exp.DatetimeAdd: _date_delta_sql, 541 exp.DateToDi: lambda self, 542 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 543 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 544 exp.DiToDate: lambda self, 545 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 546 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 547 exp.GenerateDateArray: _generate_datetime_array_sql, 548 exp.GenerateTimestampArray: _generate_datetime_array_sql, 549 exp.Explode: rename_func("UNNEST"), 550 exp.IntDiv: lambda self, e: self.binary(e, "//"), 551 exp.IsInf: rename_func("ISINF"), 552 exp.IsNan: rename_func("ISNAN"), 553 exp.JSONBExists: rename_func("JSON_EXISTS"), 554 exp.JSONExtract: _arrow_json_extract_sql, 555 exp.JSONExtractArray: _json_extract_value_array_sql, 556 exp.JSONExtractScalar: _arrow_json_extract_sql, 557 exp.JSONFormat: _json_format_sql, 558 exp.JSONValueArray: _json_extract_value_array_sql, 559 exp.Lateral: explode_to_unnest_sql, 560 exp.LogicalOr: rename_func("BOOL_OR"), 561 exp.LogicalAnd: rename_func("BOOL_AND"), 562 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 563 exp.MonthsBetween: lambda self, e: self.func( 564 "DATEDIFF", 565 "'month'", 566 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 567 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 568 ), 569 exp.PercentileCont: rename_func("QUANTILE_CONT"), 570 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 571 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 572 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 573 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 574 exp.RegexpReplace: lambda self, e: self.func( 575 "REGEXP_REPLACE", 576 e.this, 577 e.expression, 578 e.args.get("replacement"), 579 e.args.get("modifiers"), 580 ), 581 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 582 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 583 exp.Return: lambda self, e: self.sql(e, "this"), 584 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 585 exp.Rand: rename_func("RANDOM"), 586 exp.SafeDivide: no_safe_divide_sql, 587 exp.SHA: rename_func("SHA1"), 588 exp.SHA2: sha256_sql, 589 exp.Split: rename_func("STR_SPLIT"), 590 exp.SortArray: _sort_array_sql, 591 exp.StrPosition: str_position_sql, 592 exp.StrToUnix: lambda self, e: self.func( 593 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 594 ), 595 exp.Struct: _struct_sql, 596 exp.Transform: rename_func("LIST_TRANSFORM"), 597 exp.TimeAdd: _date_delta_sql, 598 exp.Time: no_time_sql, 599 exp.TimeDiff: _timediff_sql, 600 exp.Timestamp: no_timestamp_sql, 601 exp.TimestampDiff: lambda self, e: self.func( 602 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 603 ), 604 exp.TimestampTrunc: timestamptrunc_sql(), 605 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 606 exp.TimeStrToTime: timestrtotime_sql, 607 exp.TimeStrToUnix: lambda self, e: self.func( 608 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 609 ), 610 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 611 exp.TimeToUnix: rename_func("EPOCH"), 612 exp.TsOrDiToDi: lambda self, 613 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 614 exp.TsOrDsAdd: _date_delta_sql, 615 exp.TsOrDsDiff: lambda self, e: self.func( 616 "DATE_DIFF", 617 f"'{e.args.get('unit') or 'DAY'}'", 618 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 619 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 620 ), 621 exp.UnixToStr: lambda self, e: self.func( 622 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 623 ), 624 exp.DatetimeTrunc: lambda self, e: self.func( 625 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 626 ), 627 exp.UnixToTime: _unix_to_time_sql, 628 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 629 exp.VariancePop: rename_func("VAR_POP"), 630 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 631 exp.Xor: bool_xor_sql, 632 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 633 rename_func("LEVENSHTEIN") 634 ), 635 } 636 637 SUPPORTED_JSON_PATH_PARTS = { 638 exp.JSONPathKey, 639 exp.JSONPathRoot, 640 exp.JSONPathSubscript, 641 exp.JSONPathWildcard, 642 } 643 644 TYPE_MAPPING = { 645 **generator.Generator.TYPE_MAPPING, 646 exp.DataType.Type.BINARY: "BLOB", 647 exp.DataType.Type.BPCHAR: "TEXT", 648 exp.DataType.Type.CHAR: "TEXT", 649 exp.DataType.Type.FLOAT: "REAL", 650 exp.DataType.Type.NCHAR: "TEXT", 651 exp.DataType.Type.NVARCHAR: "TEXT", 652 exp.DataType.Type.UINT: "UINTEGER", 653 exp.DataType.Type.VARBINARY: "BLOB", 654 exp.DataType.Type.ROWVERSION: "BLOB", 655 exp.DataType.Type.VARCHAR: "TEXT", 656 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 657 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 658 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 659 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 660 } 661 662 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 663 RESERVED_KEYWORDS = { 664 "array", 665 "analyse", 666 "union", 667 "all", 668 "when", 669 "in_p", 670 "default", 671 "create_p", 672 "window", 673 "asymmetric", 674 "to", 675 "else", 676 "localtime", 677 "from", 678 "end_p", 679 "select", 680 "current_date", 681 "foreign", 682 "with", 683 "grant", 684 "session_user", 685 "or", 686 "except", 687 "references", 688 "fetch", 689 "limit", 690 "group_p", 691 "leading", 692 "into", 693 "collate", 694 "offset", 695 "do", 696 "then", 697 "localtimestamp", 698 "check_p", 699 "lateral_p", 700 "current_role", 701 "where", 702 "asc_p", 703 "placing", 704 "desc_p", 705 "user", 706 "unique", 707 "initially", 708 "column", 709 "both", 710 "some", 711 "as", 712 "any", 713 "only", 714 "deferrable", 715 "null_p", 716 "current_time", 717 "true_p", 718 "table", 719 "case", 720 "trailing", 721 "variadic", 722 "for", 723 "on", 724 "distinct", 725 "false_p", 726 "not", 727 "constraint", 728 "current_timestamp", 729 "returning", 730 "primary", 731 "intersect", 732 "having", 733 "analyze", 734 "current_user", 735 "and", 736 "cast", 737 "symmetric", 738 "using", 739 "order", 740 "current_catalog", 741 } 742 743 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 744 745 # DuckDB doesn't generally support CREATE TABLE .. properties 746 # https://duckdb.org/docs/sql/statements/create_table.html 747 PROPERTIES_LOCATION = { 748 prop: exp.Properties.Location.UNSUPPORTED 749 for prop in generator.Generator.PROPERTIES_LOCATION 750 } 751 752 # There are a few exceptions (e.g. temporary tables) which are supported or 753 # can be transpiled to DuckDB, so we explicitly override them accordingly 754 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 755 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 756 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 757 758 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 759 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 760 761 def strtotime_sql(self, expression: exp.StrToTime) -> str: 762 if expression.args.get("safe"): 763 formatted_time = self.format_time(expression) 764 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 765 return str_to_time_sql(self, expression) 766 767 def strtodate_sql(self, expression: exp.StrToDate) -> str: 768 if expression.args.get("safe"): 769 formatted_time = self.format_time(expression) 770 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 771 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 772 773 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 774 arg = expression.this 775 if expression.args.get("safe"): 776 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 777 return self.func("JSON", arg) 778 779 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 780 nano = expression.args.get("nano") 781 if nano is not None: 782 expression.set( 783 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 784 ) 785 786 return rename_func("MAKE_TIME")(self, expression) 787 788 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 789 sec = expression.args["sec"] 790 791 milli = expression.args.get("milli") 792 if milli is not None: 793 sec += milli.pop() / exp.Literal.number(1000.0) 794 795 nano = expression.args.get("nano") 796 if nano is not None: 797 sec += nano.pop() / exp.Literal.number(1000000000.0) 798 799 if milli or nano: 800 expression.set("sec", sec) 801 802 return rename_func("MAKE_TIMESTAMP")(self, expression) 803 804 def tablesample_sql( 805 self, 806 expression: exp.TableSample, 807 tablesample_keyword: t.Optional[str] = None, 808 ) -> str: 809 if not isinstance(expression.parent, exp.Select): 810 # This sample clause only applies to a single source, not the entire resulting relation 811 tablesample_keyword = "TABLESAMPLE" 812 813 if expression.args.get("size"): 814 method = expression.args.get("method") 815 if method and method.name.upper() != "RESERVOIR": 816 self.unsupported( 817 f"Sampling method {method} is not supported with a discrete sample count, " 818 "defaulting to reservoir sampling" 819 ) 820 expression.set("method", exp.var("RESERVOIR")) 821 822 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 823 824 def interval_sql(self, expression: exp.Interval) -> str: 825 multiplier: t.Optional[int] = None 826 unit = expression.text("unit").lower() 827 828 if unit.startswith("week"): 829 multiplier = 7 830 if unit.startswith("quarter"): 831 multiplier = 90 832 833 if multiplier: 834 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 835 836 return super().interval_sql(expression) 837 838 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 839 if isinstance(expression.parent, exp.UserDefinedFunction): 840 return self.sql(expression, "this") 841 return super().columndef_sql(expression, sep) 842 843 def join_sql(self, expression: exp.Join) -> str: 844 if ( 845 expression.side == "LEFT" 846 and not expression.args.get("on") 847 and isinstance(expression.this, exp.Unnest) 848 ): 849 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 850 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 851 return super().join_sql(expression.on(exp.true())) 852 853 return super().join_sql(expression) 854 855 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 856 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 857 if expression.args.get("is_end_exclusive"): 858 return rename_func("RANGE")(self, expression) 859 860 return self.function_fallback_sql(expression) 861 862 def bracket_sql(self, expression: exp.Bracket) -> str: 863 this = expression.this 864 if isinstance(this, exp.Array): 865 this.replace(exp.paren(this)) 866 867 bracket = super().bracket_sql(expression) 868 869 if not expression.args.get("returns_list_for_maps"): 870 if not this.type: 871 from sqlglot.optimizer.annotate_types import annotate_types 872 873 this = annotate_types(this) 874 875 if this.is_type(exp.DataType.Type.MAP): 876 bracket = f"({bracket})[1]" 877 878 return bracket 879 880 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 881 expression_sql = self.sql(expression, "expression") 882 883 func = expression.this 884 if isinstance(func, exp.PERCENTILES): 885 # Make the order key the first arg and slide the fraction to the right 886 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 887 order_col = expression.find(exp.Ordered) 888 if order_col: 889 func.set("expression", func.this) 890 func.set("this", order_col.this) 891 892 this = self.sql(expression, "this").rstrip(")") 893 894 return f"{this}{expression_sql})" 895 896 def length_sql(self, expression: exp.Length) -> str: 897 arg = expression.this 898 899 # Dialects like BQ and Snowflake also accept binary values as args, so 900 # DDB will attempt to infer the type or resort to case/when resolution 901 if not expression.args.get("binary") or arg.is_string: 902 return self.func("LENGTH", arg) 903 904 if not arg.type: 905 from sqlglot.optimizer.annotate_types import annotate_types 906 907 arg = annotate_types(arg) 908 909 if arg.is_type(*exp.DataType.TEXT_TYPES): 910 return self.func("LENGTH", arg) 911 912 # We need these casts to make duckdb's static type checker happy 913 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 914 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 915 916 case = ( 917 exp.case(self.func("TYPEOF", arg)) 918 .when( 919 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 920 ) # anonymous to break length_sql recursion 921 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 922 ) 923 924 return self.sql(case) 925 926 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 927 this = expression.this 928 key = expression.args.get("key") 929 key_sql = key.name if isinstance(key, exp.Expression) else "" 930 value_sql = self.sql(expression, "value") 931 932 kv_sql = f"{key_sql} := {value_sql}" 933 934 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 935 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 936 if isinstance(this, exp.Struct) and not this.expressions: 937 return self.func("STRUCT_PACK", kv_sql) 938 939 return self.func("STRUCT_INSERT", this, kv_sql) 940 941 def unnest_sql(self, expression: exp.Unnest) -> str: 942 explode_array = expression.args.get("explode_array") 943 if explode_array: 944 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 945 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 946 expression.expressions.append( 947 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 948 ) 949 950 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 951 alias = expression.args.get("alias") 952 if alias: 953 expression.set("alias", None) 954 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 955 956 unnest_sql = super().unnest_sql(expression) 957 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 958 return self.sql(select) 959 960 return super().unnest_sql(expression) 961 962 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 963 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 964 # DuckDB should render IGNORE NULLS only for the general-purpose 965 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 966 return super().ignorenulls_sql(expression) 967 968 return self.sql(expression, "this") 969 970 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 971 this = self.sql(expression, "this") 972 null_text = self.sql(expression, "null") 973 974 if null_text: 975 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 976 977 return self.func("ARRAY_TO_STRING", this, expression.expression) 978 979 @unsupported_args("position", "occurrence") 980 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 981 group = expression.args.get("group") 982 params = expression.args.get("parameters") 983 984 # Do not render group if there is no following argument, 985 # and it's the default value for this dialect 986 if ( 987 not params 988 and group 989 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 990 ): 991 group = None 992 return self.func( 993 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 994 )
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
767 def strtodate_sql(self, expression: exp.StrToDate) -> str: 768 if expression.args.get("safe"): 769 formatted_time = self.format_time(expression) 770 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 771 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
779 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 780 nano = expression.args.get("nano") 781 if nano is not None: 782 expression.set( 783 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 784 ) 785 786 return rename_func("MAKE_TIME")(self, expression)
788 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 789 sec = expression.args["sec"] 790 791 milli = expression.args.get("milli") 792 if milli is not None: 793 sec += milli.pop() / exp.Literal.number(1000.0) 794 795 nano = expression.args.get("nano") 796 if nano is not None: 797 sec += nano.pop() / exp.Literal.number(1000000000.0) 798 799 if milli or nano: 800 expression.set("sec", sec) 801 802 return rename_func("MAKE_TIMESTAMP")(self, expression)
804 def tablesample_sql( 805 self, 806 expression: exp.TableSample, 807 tablesample_keyword: t.Optional[str] = None, 808 ) -> str: 809 if not isinstance(expression.parent, exp.Select): 810 # This sample clause only applies to a single source, not the entire resulting relation 811 tablesample_keyword = "TABLESAMPLE" 812 813 if expression.args.get("size"): 814 method = expression.args.get("method") 815 if method and method.name.upper() != "RESERVOIR": 816 self.unsupported( 817 f"Sampling method {method} is not supported with a discrete sample count, " 818 "defaulting to reservoir sampling" 819 ) 820 expression.set("method", exp.var("RESERVOIR")) 821 822 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
824 def interval_sql(self, expression: exp.Interval) -> str: 825 multiplier: t.Optional[int] = None 826 unit = expression.text("unit").lower() 827 828 if unit.startswith("week"): 829 multiplier = 7 830 if unit.startswith("quarter"): 831 multiplier = 90 832 833 if multiplier: 834 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 835 836 return super().interval_sql(expression)
843 def join_sql(self, expression: exp.Join) -> str: 844 if ( 845 expression.side == "LEFT" 846 and not expression.args.get("on") 847 and isinstance(expression.this, exp.Unnest) 848 ): 849 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 850 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 851 return super().join_sql(expression.on(exp.true())) 852 853 return super().join_sql(expression)
862 def bracket_sql(self, expression: exp.Bracket) -> str: 863 this = expression.this 864 if isinstance(this, exp.Array): 865 this.replace(exp.paren(this)) 866 867 bracket = super().bracket_sql(expression) 868 869 if not expression.args.get("returns_list_for_maps"): 870 if not this.type: 871 from sqlglot.optimizer.annotate_types import annotate_types 872 873 this = annotate_types(this) 874 875 if this.is_type(exp.DataType.Type.MAP): 876 bracket = f"({bracket})[1]" 877 878 return bracket
880 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 881 expression_sql = self.sql(expression, "expression") 882 883 func = expression.this 884 if isinstance(func, exp.PERCENTILES): 885 # Make the order key the first arg and slide the fraction to the right 886 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 887 order_col = expression.find(exp.Ordered) 888 if order_col: 889 func.set("expression", func.this) 890 func.set("this", order_col.this) 891 892 this = self.sql(expression, "this").rstrip(")") 893 894 return f"{this}{expression_sql})"
896 def length_sql(self, expression: exp.Length) -> str: 897 arg = expression.this 898 899 # Dialects like BQ and Snowflake also accept binary values as args, so 900 # DDB will attempt to infer the type or resort to case/when resolution 901 if not expression.args.get("binary") or arg.is_string: 902 return self.func("LENGTH", arg) 903 904 if not arg.type: 905 from sqlglot.optimizer.annotate_types import annotate_types 906 907 arg = annotate_types(arg) 908 909 if arg.is_type(*exp.DataType.TEXT_TYPES): 910 return self.func("LENGTH", arg) 911 912 # We need these casts to make duckdb's static type checker happy 913 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 914 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 915 916 case = ( 917 exp.case(self.func("TYPEOF", arg)) 918 .when( 919 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 920 ) # anonymous to break length_sql recursion 921 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 922 ) 923 924 return self.sql(case)
926 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 927 this = expression.this 928 key = expression.args.get("key") 929 key_sql = key.name if isinstance(key, exp.Expression) else "" 930 value_sql = self.sql(expression, "value") 931 932 kv_sql = f"{key_sql} := {value_sql}" 933 934 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 935 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 936 if isinstance(this, exp.Struct) and not this.expressions: 937 return self.func("STRUCT_PACK", kv_sql) 938 939 return self.func("STRUCT_INSERT", this, kv_sql)
941 def unnest_sql(self, expression: exp.Unnest) -> str: 942 explode_array = expression.args.get("explode_array") 943 if explode_array: 944 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 945 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 946 expression.expressions.append( 947 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 948 ) 949 950 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 951 alias = expression.args.get("alias") 952 if alias: 953 expression.set("alias", None) 954 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 955 956 unnest_sql = super().unnest_sql(expression) 957 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 958 return self.sql(select) 959 960 return super().unnest_sql(expression)
962 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 963 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 964 # DuckDB should render IGNORE NULLS only for the general-purpose 965 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 966 return super().ignorenulls_sql(expression) 967 968 return self.sql(expression, "this")
970 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 971 this = self.sql(expression, "this") 972 null_text = self.sql(expression, "null") 973 974 if null_text: 975 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 976 977 return self.func("ARRAY_TO_STRING", this, expression.expression)
979 @unsupported_args("position", "occurrence") 980 def regexpextract_sql(self, expression: exp.RegexpExtract) -> str: 981 group = expression.args.get("group") 982 params = expression.args.get("parameters") 983 984 # Do not render group if there is no following argument, 985 # and it's the default value for this dialect 986 if ( 987 not params 988 and group 989 and group.name == str(self.dialect.REGEXP_EXTRACT_DEFAULT_GROUP) 990 ): 991 group = None 992 return self.func( 993 "REGEXP_EXTRACT", expression.this, expression.expression, group, params 994 )
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql