sqlglot.dialects.clickhouse
1from __future__ import annotations 2import typing as t 3import datetime 4from sqlglot import exp, generator, parser, tokens 5from sqlglot.dialects.dialect import ( 6 Dialect, 7 NormalizationStrategy, 8 arg_max_or_min_no_count, 9 build_date_delta, 10 build_formatted_time, 11 inline_array_sql, 12 json_extract_segments, 13 json_path_key_only_name, 14 no_pivot_sql, 15 build_json_extract_path, 16 rename_func, 17 sha256_sql, 18 var_map_sql, 19 timestamptrunc_sql, 20 unit_to_var, 21 trim_sql, 22) 23from sqlglot.generator import Generator 24from sqlglot.helper import is_int, seq_get 25from sqlglot.tokens import Token, TokenType 26from sqlglot.generator import unsupported_args 27 28DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 29 30 31def _build_date_format(args: t.List) -> exp.TimeToStr: 32 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 33 34 timezone = seq_get(args, 2) 35 if timezone: 36 expr.set("zone", timezone) 37 38 return expr 39 40 41def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 42 scale = expression.args.get("scale") 43 timestamp = expression.this 44 45 if scale in (None, exp.UnixToTime.SECONDS): 46 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 47 if scale == exp.UnixToTime.MILLIS: 48 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 49 if scale == exp.UnixToTime.MICROS: 50 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 51 if scale == exp.UnixToTime.NANOS: 52 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 53 54 return self.func( 55 "fromUnixTimestamp", 56 exp.cast( 57 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 58 ), 59 ) 60 61 62def _lower_func(sql: str) -> str: 63 index = sql.index("(") 64 return sql[:index].lower() + sql[index:] 65 66 67def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 68 quantile = expression.args["quantile"] 69 args = f"({self.sql(expression, 'this')})" 70 71 if isinstance(quantile, exp.Array): 72 func = self.func("quantiles", *quantile) 73 else: 74 func = self.func("quantile", quantile) 75 76 return func + args 77 78 79def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 80 if len(args) == 1: 81 return exp.CountIf(this=seq_get(args, 0)) 82 83 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 84 85 86def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 87 if len(args) == 3: 88 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 89 90 strtodate = exp.StrToDate.from_arg_list(args) 91 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 92 93 94def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 95 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 96 if not expression.unit: 97 return rename_func(name)(self, expression) 98 99 return self.func( 100 name, 101 unit_to_var(expression), 102 expression.expression, 103 expression.this, 104 ) 105 106 return _delta_sql 107 108 109def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 110 ts = expression.this 111 112 tz = expression.args.get("zone") 113 if tz and isinstance(ts, exp.Literal): 114 # Clickhouse will not accept timestamps that include a UTC offset, so we must remove them. 115 # The first step to removing is parsing the string with `datetime.datetime.fromisoformat`. 116 # 117 # In python <3.11, `fromisoformat()` can only parse timestamps of millisecond (3 digit) 118 # or microsecond (6 digit) precision. It will error if passed any other number of fractional 119 # digits, so we extract the fractional seconds and pad to 6 digits before parsing. 120 ts_string = ts.name.strip() 121 122 # separate [date and time] from [fractional seconds and UTC offset] 123 ts_parts = ts_string.split(".") 124 if len(ts_parts) == 2: 125 # separate fractional seconds and UTC offset 126 offset_sep = "+" if "+" in ts_parts[1] else "-" 127 ts_frac_parts = ts_parts[1].split(offset_sep) 128 num_frac_parts = len(ts_frac_parts) 129 130 # pad to 6 digits if fractional seconds present 131 ts_frac_parts[0] = ts_frac_parts[0].ljust(6, "0") 132 ts_string = "".join( 133 [ 134 ts_parts[0], # date and time 135 ".", 136 ts_frac_parts[0], # fractional seconds 137 offset_sep if num_frac_parts > 1 else "", 138 ts_frac_parts[1] if num_frac_parts > 1 else "", # utc offset (if present) 139 ] 140 ) 141 142 # return literal with no timezone, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 143 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if 144 # it's part of the timestamp string 145 ts_without_tz = ( 146 datetime.datetime.fromisoformat(ts_string).replace(tzinfo=None).isoformat(sep=" ") 147 ) 148 ts = exp.Literal.string(ts_without_tz) 149 150 # Non-nullable DateTime64 with microsecond precision 151 expressions = [exp.DataTypeParam(this=tz)] if tz else [] 152 datatype = exp.DataType.build( 153 exp.DataType.Type.DATETIME64, 154 expressions=[exp.DataTypeParam(this=exp.Literal.number(6)), *expressions], 155 nullable=False, 156 ) 157 158 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 159 160 161class ClickHouse(Dialect): 162 NORMALIZE_FUNCTIONS: bool | str = False 163 NULL_ORDERING = "nulls_are_last" 164 SUPPORTS_USER_DEFINED_TYPES = False 165 SAFE_DIVISION = True 166 LOG_BASE_FIRST: t.Optional[bool] = None 167 FORCE_EARLY_ALIAS_REF_EXPANSION = True 168 PRESERVE_ORIGINAL_NAMES = True 169 170 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 171 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 172 173 UNESCAPED_SEQUENCES = { 174 "\\0": "\0", 175 } 176 177 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 178 179 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 180 exp.Except: False, 181 exp.Intersect: False, 182 exp.Union: None, 183 } 184 185 class Tokenizer(tokens.Tokenizer): 186 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 187 IDENTIFIERS = ['"', "`"] 188 IDENTIFIER_ESCAPES = ["\\"] 189 STRING_ESCAPES = ["'", "\\"] 190 BIT_STRINGS = [("0b", "")] 191 HEX_STRINGS = [("0x", ""), ("0X", "")] 192 HEREDOC_STRINGS = ["$"] 193 194 KEYWORDS = { 195 **tokens.Tokenizer.KEYWORDS, 196 "ATTACH": TokenType.COMMAND, 197 "DATE32": TokenType.DATE32, 198 "DATETIME64": TokenType.DATETIME64, 199 "DICTIONARY": TokenType.DICTIONARY, 200 "ENUM8": TokenType.ENUM8, 201 "ENUM16": TokenType.ENUM16, 202 "FINAL": TokenType.FINAL, 203 "FIXEDSTRING": TokenType.FIXEDSTRING, 204 "FLOAT32": TokenType.FLOAT, 205 "FLOAT64": TokenType.DOUBLE, 206 "GLOBAL": TokenType.GLOBAL, 207 "INT256": TokenType.INT256, 208 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 209 "MAP": TokenType.MAP, 210 "NESTED": TokenType.NESTED, 211 "SAMPLE": TokenType.TABLE_SAMPLE, 212 "TUPLE": TokenType.STRUCT, 213 "UINT128": TokenType.UINT128, 214 "UINT16": TokenType.USMALLINT, 215 "UINT256": TokenType.UINT256, 216 "UINT32": TokenType.UINT, 217 "UINT64": TokenType.UBIGINT, 218 "UINT8": TokenType.UTINYINT, 219 "IPV4": TokenType.IPV4, 220 "IPV6": TokenType.IPV6, 221 "POINT": TokenType.POINT, 222 "RING": TokenType.RING, 223 "LINESTRING": TokenType.LINESTRING, 224 "MULTILINESTRING": TokenType.MULTILINESTRING, 225 "POLYGON": TokenType.POLYGON, 226 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 227 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 228 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 229 "SYSTEM": TokenType.COMMAND, 230 "PREWHERE": TokenType.PREWHERE, 231 } 232 KEYWORDS.pop("/*+") 233 234 SINGLE_TOKENS = { 235 **tokens.Tokenizer.SINGLE_TOKENS, 236 "$": TokenType.HEREDOC_STRING, 237 } 238 239 class Parser(parser.Parser): 240 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 241 # * select x from t1 union all select x from t2 limit 1; 242 # * select x from t1 union all (select x from t2 limit 1); 243 MODIFIERS_ATTACHED_TO_SET_OP = False 244 INTERVAL_SPANS = False 245 246 FUNCTIONS = { 247 **parser.Parser.FUNCTIONS, 248 "ANY": exp.AnyValue.from_arg_list, 249 "ARRAYSUM": exp.ArraySum.from_arg_list, 250 "COUNTIF": _build_count_if, 251 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 252 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 253 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 254 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 255 "DATE_FORMAT": _build_date_format, 256 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 257 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 258 "FORMATDATETIME": _build_date_format, 259 "JSONEXTRACTSTRING": build_json_extract_path( 260 exp.JSONExtractScalar, zero_based_indexing=False 261 ), 262 "MAP": parser.build_var_map, 263 "MATCH": exp.RegexpLike.from_arg_list, 264 "RANDCANONICAL": exp.Rand.from_arg_list, 265 "STR_TO_DATE": _build_str_to_date, 266 "TUPLE": exp.Struct.from_arg_list, 267 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 268 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 269 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 270 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 271 "UNIQ": exp.ApproxDistinct.from_arg_list, 272 "XOR": lambda args: exp.Xor(expressions=args), 273 "MD5": exp.MD5Digest.from_arg_list, 274 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 275 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 276 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 277 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 278 } 279 FUNCTIONS.pop("TRANSFORM") 280 281 AGG_FUNCTIONS = { 282 "count", 283 "min", 284 "max", 285 "sum", 286 "avg", 287 "any", 288 "stddevPop", 289 "stddevSamp", 290 "varPop", 291 "varSamp", 292 "corr", 293 "covarPop", 294 "covarSamp", 295 "entropy", 296 "exponentialMovingAverage", 297 "intervalLengthSum", 298 "kolmogorovSmirnovTest", 299 "mannWhitneyUTest", 300 "median", 301 "rankCorr", 302 "sumKahan", 303 "studentTTest", 304 "welchTTest", 305 "anyHeavy", 306 "anyLast", 307 "boundingRatio", 308 "first_value", 309 "last_value", 310 "argMin", 311 "argMax", 312 "avgWeighted", 313 "topK", 314 "topKWeighted", 315 "deltaSum", 316 "deltaSumTimestamp", 317 "groupArray", 318 "groupArrayLast", 319 "groupUniqArray", 320 "groupArrayInsertAt", 321 "groupArrayMovingAvg", 322 "groupArrayMovingSum", 323 "groupArraySample", 324 "groupBitAnd", 325 "groupBitOr", 326 "groupBitXor", 327 "groupBitmap", 328 "groupBitmapAnd", 329 "groupBitmapOr", 330 "groupBitmapXor", 331 "sumWithOverflow", 332 "sumMap", 333 "minMap", 334 "maxMap", 335 "skewSamp", 336 "skewPop", 337 "kurtSamp", 338 "kurtPop", 339 "uniq", 340 "uniqExact", 341 "uniqCombined", 342 "uniqCombined64", 343 "uniqHLL12", 344 "uniqTheta", 345 "quantile", 346 "quantiles", 347 "quantileExact", 348 "quantilesExact", 349 "quantileExactLow", 350 "quantilesExactLow", 351 "quantileExactHigh", 352 "quantilesExactHigh", 353 "quantileExactWeighted", 354 "quantilesExactWeighted", 355 "quantileTiming", 356 "quantilesTiming", 357 "quantileTimingWeighted", 358 "quantilesTimingWeighted", 359 "quantileDeterministic", 360 "quantilesDeterministic", 361 "quantileTDigest", 362 "quantilesTDigest", 363 "quantileTDigestWeighted", 364 "quantilesTDigestWeighted", 365 "quantileBFloat16", 366 "quantilesBFloat16", 367 "quantileBFloat16Weighted", 368 "quantilesBFloat16Weighted", 369 "simpleLinearRegression", 370 "stochasticLinearRegression", 371 "stochasticLogisticRegression", 372 "categoricalInformationValue", 373 "contingency", 374 "cramersV", 375 "cramersVBiasCorrected", 376 "theilsU", 377 "maxIntersections", 378 "maxIntersectionsPosition", 379 "meanZTest", 380 "quantileInterpolatedWeighted", 381 "quantilesInterpolatedWeighted", 382 "quantileGK", 383 "quantilesGK", 384 "sparkBar", 385 "sumCount", 386 "largestTriangleThreeBuckets", 387 "histogram", 388 "sequenceMatch", 389 "sequenceCount", 390 "windowFunnel", 391 "retention", 392 "uniqUpTo", 393 "sequenceNextNode", 394 "exponentialTimeDecayedAvg", 395 } 396 397 AGG_FUNCTIONS_SUFFIXES = [ 398 "If", 399 "Array", 400 "ArrayIf", 401 "Map", 402 "SimpleState", 403 "State", 404 "Merge", 405 "MergeState", 406 "ForEach", 407 "Distinct", 408 "OrDefault", 409 "OrNull", 410 "Resample", 411 "ArgMin", 412 "ArgMax", 413 ] 414 415 FUNC_TOKENS = { 416 *parser.Parser.FUNC_TOKENS, 417 TokenType.SET, 418 } 419 420 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 421 422 ID_VAR_TOKENS = { 423 *parser.Parser.ID_VAR_TOKENS, 424 TokenType.LIKE, 425 } 426 427 AGG_FUNC_MAPPING = ( 428 lambda functions, suffixes: { 429 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 430 } 431 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 432 433 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 434 435 FUNCTION_PARSERS = { 436 **parser.Parser.FUNCTION_PARSERS, 437 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 438 "QUANTILE": lambda self: self._parse_quantile(), 439 "MEDIAN": lambda self: self._parse_quantile(), 440 "COLUMNS": lambda self: self._parse_columns(), 441 } 442 443 FUNCTION_PARSERS.pop("MATCH") 444 445 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 446 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 447 448 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 449 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 450 451 RANGE_PARSERS = { 452 **parser.Parser.RANGE_PARSERS, 453 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 454 and self._parse_in(this, is_global=True), 455 } 456 457 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 458 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 459 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 460 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 461 462 JOIN_KINDS = { 463 *parser.Parser.JOIN_KINDS, 464 TokenType.ANY, 465 TokenType.ASOF, 466 TokenType.ARRAY, 467 } 468 469 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 470 TokenType.ANY, 471 TokenType.ARRAY, 472 TokenType.FINAL, 473 TokenType.FORMAT, 474 TokenType.SETTINGS, 475 } 476 477 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 478 TokenType.FORMAT, 479 } 480 481 LOG_DEFAULTS_TO_LN = True 482 483 QUERY_MODIFIER_PARSERS = { 484 **parser.Parser.QUERY_MODIFIER_PARSERS, 485 TokenType.SETTINGS: lambda self: ( 486 "settings", 487 self._advance() or self._parse_csv(self._parse_assignment), 488 ), 489 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 490 } 491 492 CONSTRAINT_PARSERS = { 493 **parser.Parser.CONSTRAINT_PARSERS, 494 "INDEX": lambda self: self._parse_index_constraint(), 495 "CODEC": lambda self: self._parse_compress(), 496 } 497 498 ALTER_PARSERS = { 499 **parser.Parser.ALTER_PARSERS, 500 "REPLACE": lambda self: self._parse_alter_table_replace(), 501 } 502 503 SCHEMA_UNNAMED_CONSTRAINTS = { 504 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 505 "INDEX", 506 } 507 508 PLACEHOLDER_PARSERS = { 509 **parser.Parser.PLACEHOLDER_PARSERS, 510 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 511 } 512 513 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 514 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 515 return self._parse_lambda() 516 517 def _parse_types( 518 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 519 ) -> t.Optional[exp.Expression]: 520 dtype = super()._parse_types( 521 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 522 ) 523 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 524 # Mark every type as non-nullable which is ClickHouse's default, unless it's 525 # already marked as nullable. This marker helps us transpile types from other 526 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 527 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 528 # fail in ClickHouse without the `Nullable` type constructor. 529 dtype.set("nullable", False) 530 531 return dtype 532 533 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 534 index = self._index 535 this = self._parse_bitwise() 536 if self._match(TokenType.FROM): 537 self._retreat(index) 538 return super()._parse_extract() 539 540 # We return Anonymous here because extract and regexpExtract have different semantics, 541 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 542 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 543 # 544 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 545 self._match(TokenType.COMMA) 546 return self.expression( 547 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 548 ) 549 550 def _parse_assignment(self) -> t.Optional[exp.Expression]: 551 this = super()._parse_assignment() 552 553 if self._match(TokenType.PLACEHOLDER): 554 return self.expression( 555 exp.If, 556 this=this, 557 true=self._parse_assignment(), 558 false=self._match(TokenType.COLON) and self._parse_assignment(), 559 ) 560 561 return this 562 563 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 564 """ 565 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 566 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 567 """ 568 index = self._index 569 570 this = self._parse_id_var() 571 self._match(TokenType.COLON) 572 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 573 self._match_text_seq("IDENTIFIER") and "Identifier" 574 ) 575 576 if not kind: 577 self._retreat(index) 578 return None 579 elif not self._match(TokenType.R_BRACE): 580 self.raise_error("Expecting }") 581 582 return self.expression(exp.Placeholder, this=this, kind=kind) 583 584 def _parse_bracket( 585 self, this: t.Optional[exp.Expression] = None 586 ) -> t.Optional[exp.Expression]: 587 l_brace = self._match(TokenType.L_BRACE, advance=False) 588 bracket = super()._parse_bracket(this) 589 590 if l_brace and isinstance(bracket, exp.Struct): 591 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 592 for expression in bracket.expressions: 593 if not isinstance(expression, exp.PropertyEQ): 594 break 595 596 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 597 varmap.args["values"].append("expressions", expression.expression) 598 599 return varmap 600 601 return bracket 602 603 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 604 this = super()._parse_in(this) 605 this.set("is_global", is_global) 606 return this 607 608 def _parse_table( 609 self, 610 schema: bool = False, 611 joins: bool = False, 612 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 613 parse_bracket: bool = False, 614 is_db_reference: bool = False, 615 parse_partition: bool = False, 616 ) -> t.Optional[exp.Expression]: 617 this = super()._parse_table( 618 schema=schema, 619 joins=joins, 620 alias_tokens=alias_tokens, 621 parse_bracket=parse_bracket, 622 is_db_reference=is_db_reference, 623 ) 624 625 if self._match(TokenType.FINAL): 626 this = self.expression(exp.Final, this=this) 627 628 return this 629 630 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 631 return super()._parse_position(haystack_first=True) 632 633 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 634 def _parse_cte(self) -> exp.CTE: 635 # WITH <identifier> AS <subquery expression> 636 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 637 638 if not cte: 639 # WITH <expression> AS <identifier> 640 cte = self.expression( 641 exp.CTE, 642 this=self._parse_assignment(), 643 alias=self._parse_table_alias(), 644 scalar=True, 645 ) 646 647 return cte 648 649 def _parse_join_parts( 650 self, 651 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 652 is_global = self._match(TokenType.GLOBAL) and self._prev 653 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 654 655 if kind_pre: 656 kind = self._match_set(self.JOIN_KINDS) and self._prev 657 side = self._match_set(self.JOIN_SIDES) and self._prev 658 return is_global, side, kind 659 660 return ( 661 is_global, 662 self._match_set(self.JOIN_SIDES) and self._prev, 663 self._match_set(self.JOIN_KINDS) and self._prev, 664 ) 665 666 def _parse_join( 667 self, skip_join_token: bool = False, parse_bracket: bool = False 668 ) -> t.Optional[exp.Join]: 669 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 670 if join: 671 join.set("global", join.args.pop("method", None)) 672 673 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 674 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 675 if join.kind == "ARRAY": 676 for table in join.find_all(exp.Table): 677 table.replace(table.to_column()) 678 679 return join 680 681 def _parse_function( 682 self, 683 functions: t.Optional[t.Dict[str, t.Callable]] = None, 684 anonymous: bool = False, 685 optional_parens: bool = True, 686 any_token: bool = False, 687 ) -> t.Optional[exp.Expression]: 688 expr = super()._parse_function( 689 functions=functions, 690 anonymous=anonymous, 691 optional_parens=optional_parens, 692 any_token=any_token, 693 ) 694 695 func = expr.this if isinstance(expr, exp.Window) else expr 696 697 # Aggregate functions can be split in 2 parts: <func_name><suffix> 698 parts = ( 699 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 700 ) 701 702 if parts: 703 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 704 params = self._parse_func_params(anon_func) 705 706 kwargs = { 707 "this": anon_func.this, 708 "expressions": anon_func.expressions, 709 } 710 if parts[1]: 711 kwargs["parts"] = parts 712 exp_class: t.Type[exp.Expression] = ( 713 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 714 ) 715 else: 716 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 717 718 kwargs["exp_class"] = exp_class 719 if params: 720 kwargs["params"] = params 721 722 func = self.expression(**kwargs) 723 724 if isinstance(expr, exp.Window): 725 # The window's func was parsed as Anonymous in base parser, fix its 726 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 727 expr.set("this", func) 728 elif params: 729 # Params have blocked super()._parse_function() from parsing the following window 730 # (if that exists) as they're standing between the function call and the window spec 731 expr = self._parse_window(func) 732 else: 733 expr = func 734 735 return expr 736 737 def _parse_func_params( 738 self, this: t.Optional[exp.Func] = None 739 ) -> t.Optional[t.List[exp.Expression]]: 740 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 741 return self._parse_csv(self._parse_lambda) 742 743 if self._match(TokenType.L_PAREN): 744 params = self._parse_csv(self._parse_lambda) 745 self._match_r_paren(this) 746 return params 747 748 return None 749 750 def _parse_quantile(self) -> exp.Quantile: 751 this = self._parse_lambda() 752 params = self._parse_func_params() 753 if params: 754 return self.expression(exp.Quantile, this=params[0], quantile=this) 755 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 756 757 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 758 return super()._parse_wrapped_id_vars(optional=True) 759 760 def _parse_primary_key( 761 self, wrapped_optional: bool = False, in_props: bool = False 762 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 763 return super()._parse_primary_key( 764 wrapped_optional=wrapped_optional or in_props, in_props=in_props 765 ) 766 767 def _parse_on_property(self) -> t.Optional[exp.Expression]: 768 index = self._index 769 if self._match_text_seq("CLUSTER"): 770 this = self._parse_id_var() 771 if this: 772 return self.expression(exp.OnCluster, this=this) 773 else: 774 self._retreat(index) 775 return None 776 777 def _parse_index_constraint( 778 self, kind: t.Optional[str] = None 779 ) -> exp.IndexColumnConstraint: 780 # INDEX name1 expr TYPE type1(args) GRANULARITY value 781 this = self._parse_id_var() 782 expression = self._parse_assignment() 783 784 index_type = self._match_text_seq("TYPE") and ( 785 self._parse_function() or self._parse_var() 786 ) 787 788 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 789 790 return self.expression( 791 exp.IndexColumnConstraint, 792 this=this, 793 expression=expression, 794 index_type=index_type, 795 granularity=granularity, 796 ) 797 798 def _parse_partition(self) -> t.Optional[exp.Partition]: 799 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 800 if not self._match(TokenType.PARTITION): 801 return None 802 803 if self._match_text_seq("ID"): 804 # Corresponds to the PARTITION ID <string_value> syntax 805 expressions: t.List[exp.Expression] = [ 806 self.expression(exp.PartitionId, this=self._parse_string()) 807 ] 808 else: 809 expressions = self._parse_expressions() 810 811 return self.expression(exp.Partition, expressions=expressions) 812 813 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 814 partition = self._parse_partition() 815 816 if not partition or not self._match(TokenType.FROM): 817 return None 818 819 return self.expression( 820 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 821 ) 822 823 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 824 if not self._match_text_seq("PROJECTION"): 825 return None 826 827 return self.expression( 828 exp.ProjectionDef, 829 this=self._parse_id_var(), 830 expression=self._parse_wrapped(self._parse_statement), 831 ) 832 833 def _parse_constraint(self) -> t.Optional[exp.Expression]: 834 return super()._parse_constraint() or self._parse_projection_def() 835 836 def _parse_alias( 837 self, this: t.Optional[exp.Expression], explicit: bool = False 838 ) -> t.Optional[exp.Expression]: 839 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 840 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 841 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 842 return this 843 844 return super()._parse_alias(this=this, explicit=explicit) 845 846 def _parse_expression(self) -> t.Optional[exp.Expression]: 847 this = super()._parse_expression() 848 849 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 850 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 851 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 852 self._match(TokenType.R_PAREN) 853 854 return this 855 856 def _parse_columns(self) -> exp.Expression: 857 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 858 859 while self._next and self._match_text_seq(")", "APPLY", "("): 860 self._match(TokenType.R_PAREN) 861 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 862 return this 863 864 class Generator(generator.Generator): 865 QUERY_HINTS = False 866 STRUCT_DELIMITER = ("(", ")") 867 NVL2_SUPPORTED = False 868 TABLESAMPLE_REQUIRES_PARENS = False 869 TABLESAMPLE_SIZE_IS_ROWS = False 870 TABLESAMPLE_KEYWORDS = "SAMPLE" 871 LAST_DAY_SUPPORTS_DATE_PART = False 872 CAN_IMPLEMENT_ARRAY_ANY = True 873 SUPPORTS_TO_NUMBER = False 874 JOIN_HINTS = False 875 TABLE_HINTS = False 876 GROUPINGS_SEP = "" 877 SET_OP_MODIFIERS = False 878 SUPPORTS_TABLE_ALIAS_COLUMNS = False 879 VALUES_AS_TABLE = False 880 ARRAY_SIZE_NAME = "LENGTH" 881 882 STRING_TYPE_MAPPING = { 883 exp.DataType.Type.CHAR: "String", 884 exp.DataType.Type.LONGBLOB: "String", 885 exp.DataType.Type.LONGTEXT: "String", 886 exp.DataType.Type.MEDIUMBLOB: "String", 887 exp.DataType.Type.MEDIUMTEXT: "String", 888 exp.DataType.Type.TINYBLOB: "String", 889 exp.DataType.Type.TINYTEXT: "String", 890 exp.DataType.Type.TEXT: "String", 891 exp.DataType.Type.VARBINARY: "String", 892 exp.DataType.Type.VARCHAR: "String", 893 } 894 895 SUPPORTED_JSON_PATH_PARTS = { 896 exp.JSONPathKey, 897 exp.JSONPathRoot, 898 exp.JSONPathSubscript, 899 } 900 901 TYPE_MAPPING = { 902 **generator.Generator.TYPE_MAPPING, 903 **STRING_TYPE_MAPPING, 904 exp.DataType.Type.ARRAY: "Array", 905 exp.DataType.Type.BOOLEAN: "Bool", 906 exp.DataType.Type.BIGINT: "Int64", 907 exp.DataType.Type.DATE32: "Date32", 908 exp.DataType.Type.DATETIME: "DateTime", 909 exp.DataType.Type.DATETIME2: "DateTime", 910 exp.DataType.Type.SMALLDATETIME: "DateTime", 911 exp.DataType.Type.DATETIME64: "DateTime64", 912 exp.DataType.Type.DECIMAL: "Decimal", 913 exp.DataType.Type.DECIMAL32: "Decimal32", 914 exp.DataType.Type.DECIMAL64: "Decimal64", 915 exp.DataType.Type.DECIMAL128: "Decimal128", 916 exp.DataType.Type.DECIMAL256: "Decimal256", 917 exp.DataType.Type.TIMESTAMP: "DateTime", 918 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 919 exp.DataType.Type.DOUBLE: "Float64", 920 exp.DataType.Type.ENUM: "Enum", 921 exp.DataType.Type.ENUM8: "Enum8", 922 exp.DataType.Type.ENUM16: "Enum16", 923 exp.DataType.Type.FIXEDSTRING: "FixedString", 924 exp.DataType.Type.FLOAT: "Float32", 925 exp.DataType.Type.INT: "Int32", 926 exp.DataType.Type.MEDIUMINT: "Int32", 927 exp.DataType.Type.INT128: "Int128", 928 exp.DataType.Type.INT256: "Int256", 929 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 930 exp.DataType.Type.MAP: "Map", 931 exp.DataType.Type.NESTED: "Nested", 932 exp.DataType.Type.SMALLINT: "Int16", 933 exp.DataType.Type.STRUCT: "Tuple", 934 exp.DataType.Type.TINYINT: "Int8", 935 exp.DataType.Type.UBIGINT: "UInt64", 936 exp.DataType.Type.UINT: "UInt32", 937 exp.DataType.Type.UINT128: "UInt128", 938 exp.DataType.Type.UINT256: "UInt256", 939 exp.DataType.Type.USMALLINT: "UInt16", 940 exp.DataType.Type.UTINYINT: "UInt8", 941 exp.DataType.Type.IPV4: "IPv4", 942 exp.DataType.Type.IPV6: "IPv6", 943 exp.DataType.Type.POINT: "Point", 944 exp.DataType.Type.RING: "Ring", 945 exp.DataType.Type.LINESTRING: "LineString", 946 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 947 exp.DataType.Type.POLYGON: "Polygon", 948 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 949 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 950 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 951 } 952 953 TRANSFORMS = { 954 **generator.Generator.TRANSFORMS, 955 exp.AnyValue: rename_func("any"), 956 exp.ApproxDistinct: rename_func("uniq"), 957 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 958 exp.ArraySum: rename_func("arraySum"), 959 exp.ArgMax: arg_max_or_min_no_count("argMax"), 960 exp.ArgMin: arg_max_or_min_no_count("argMin"), 961 exp.Array: inline_array_sql, 962 exp.CastToStrType: rename_func("CAST"), 963 exp.CountIf: rename_func("countIf"), 964 exp.CompressColumnConstraint: lambda self, 965 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 966 exp.ComputedColumnConstraint: lambda self, 967 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 968 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 969 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 970 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 971 exp.DateStrToDate: rename_func("toDate"), 972 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 973 exp.Explode: rename_func("arrayJoin"), 974 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 975 exp.IsNan: rename_func("isNaN"), 976 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 977 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 978 exp.JSONPathKey: json_path_key_only_name, 979 exp.JSONPathRoot: lambda *_: "", 980 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 981 exp.Median: rename_func("median"), 982 exp.Nullif: rename_func("nullIf"), 983 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 984 exp.Pivot: no_pivot_sql, 985 exp.Quantile: _quantile_sql, 986 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 987 exp.Rand: rename_func("randCanonical"), 988 exp.StartsWith: rename_func("startsWith"), 989 exp.StrPosition: lambda self, e: self.func( 990 "position", e.this, e.args.get("substr"), e.args.get("position") 991 ), 992 exp.TimeToStr: lambda self, e: self.func( 993 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 994 ), 995 exp.TimeStrToTime: _timestrtotime_sql, 996 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 997 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 998 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 999 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1000 exp.MD5Digest: rename_func("MD5"), 1001 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1002 exp.SHA: rename_func("SHA1"), 1003 exp.SHA2: sha256_sql, 1004 exp.UnixToTime: _unix_to_time_sql, 1005 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1006 exp.Trim: trim_sql, 1007 exp.Variance: rename_func("varSamp"), 1008 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1009 exp.Stddev: rename_func("stddevSamp"), 1010 exp.Chr: rename_func("CHAR"), 1011 exp.Lag: lambda self, e: self.func( 1012 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1013 ), 1014 exp.Lead: lambda self, e: self.func( 1015 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1016 ), 1017 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1018 rename_func("editDistance") 1019 ), 1020 } 1021 1022 PROPERTIES_LOCATION = { 1023 **generator.Generator.PROPERTIES_LOCATION, 1024 exp.OnCluster: exp.Properties.Location.POST_NAME, 1025 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1026 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1027 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1028 } 1029 1030 # There's no list in docs, but it can be found in Clickhouse code 1031 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1032 ON_CLUSTER_TARGETS = { 1033 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1034 "DATABASE", 1035 "TABLE", 1036 "VIEW", 1037 "DICTIONARY", 1038 "INDEX", 1039 "FUNCTION", 1040 "NAMED COLLECTION", 1041 } 1042 1043 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1044 NON_NULLABLE_TYPES = { 1045 exp.DataType.Type.ARRAY, 1046 exp.DataType.Type.MAP, 1047 exp.DataType.Type.STRUCT, 1048 exp.DataType.Type.POINT, 1049 exp.DataType.Type.RING, 1050 exp.DataType.Type.LINESTRING, 1051 exp.DataType.Type.MULTILINESTRING, 1052 exp.DataType.Type.POLYGON, 1053 exp.DataType.Type.MULTIPOLYGON, 1054 } 1055 1056 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1057 strtodate_sql = self.function_fallback_sql(expression) 1058 1059 if not isinstance(expression.parent, exp.Cast): 1060 # StrToDate returns DATEs in other dialects (eg. postgres), so 1061 # this branch aims to improve the transpilation to clickhouse 1062 return f"CAST({strtodate_sql} AS DATE)" 1063 1064 return strtodate_sql 1065 1066 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1067 this = expression.this 1068 1069 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1070 return self.sql(this) 1071 1072 return super().cast_sql(expression, safe_prefix=safe_prefix) 1073 1074 def trycast_sql(self, expression: exp.TryCast) -> str: 1075 dtype = expression.to 1076 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1077 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1078 dtype.set("nullable", True) 1079 1080 return super().cast_sql(expression) 1081 1082 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1083 this = self.json_path_part(expression.this) 1084 return str(int(this) + 1) if is_int(this) else this 1085 1086 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1087 return f"AS {self.sql(expression, 'this')}" 1088 1089 def _any_to_has( 1090 self, 1091 expression: exp.EQ | exp.NEQ, 1092 default: t.Callable[[t.Any], str], 1093 prefix: str = "", 1094 ) -> str: 1095 if isinstance(expression.left, exp.Any): 1096 arr = expression.left 1097 this = expression.right 1098 elif isinstance(expression.right, exp.Any): 1099 arr = expression.right 1100 this = expression.left 1101 else: 1102 return default(expression) 1103 1104 return prefix + self.func("has", arr.this.unnest(), this) 1105 1106 def eq_sql(self, expression: exp.EQ) -> str: 1107 return self._any_to_has(expression, super().eq_sql) 1108 1109 def neq_sql(self, expression: exp.NEQ) -> str: 1110 return self._any_to_has(expression, super().neq_sql, "NOT ") 1111 1112 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1113 # Manually add a flag to make the search case-insensitive 1114 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1115 return self.func("match", expression.this, regex) 1116 1117 def datatype_sql(self, expression: exp.DataType) -> str: 1118 # String is the standard ClickHouse type, every other variant is just an alias. 1119 # Additionally, any supplied length parameter will be ignored. 1120 # 1121 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1122 if expression.this in self.STRING_TYPE_MAPPING: 1123 dtype = "String" 1124 else: 1125 dtype = super().datatype_sql(expression) 1126 1127 # This section changes the type to `Nullable(...)` if the following conditions hold: 1128 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1129 # and change their semantics 1130 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1131 # constraint: "Type of Map key must be a type, that can be represented by integer or 1132 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1133 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1134 parent = expression.parent 1135 nullable = expression.args.get("nullable") 1136 if nullable is True or ( 1137 nullable is None 1138 and not ( 1139 isinstance(parent, exp.DataType) 1140 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1141 and expression.index in (None, 0) 1142 ) 1143 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1144 ): 1145 dtype = f"Nullable({dtype})" 1146 1147 return dtype 1148 1149 def cte_sql(self, expression: exp.CTE) -> str: 1150 if expression.args.get("scalar"): 1151 this = self.sql(expression, "this") 1152 alias = self.sql(expression, "alias") 1153 return f"{this} AS {alias}" 1154 1155 return super().cte_sql(expression) 1156 1157 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1158 return super().after_limit_modifiers(expression) + [ 1159 ( 1160 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1161 if expression.args.get("settings") 1162 else "" 1163 ), 1164 ( 1165 self.seg("FORMAT ") + self.sql(expression, "format") 1166 if expression.args.get("format") 1167 else "" 1168 ), 1169 ] 1170 1171 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1172 params = self.expressions(expression, key="params", flat=True) 1173 return self.func(expression.name, *expression.expressions) + f"({params})" 1174 1175 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1176 return self.func(expression.name, *expression.expressions) 1177 1178 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1179 return self.anonymousaggfunc_sql(expression) 1180 1181 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1182 return self.parameterizedagg_sql(expression) 1183 1184 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1185 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1186 1187 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1188 return f"ON CLUSTER {self.sql(expression, 'this')}" 1189 1190 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1191 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1192 exp.Properties.Location.POST_NAME 1193 ): 1194 this_name = self.sql( 1195 expression.this if isinstance(expression.this, exp.Schema) else expression, 1196 "this", 1197 ) 1198 this_properties = " ".join( 1199 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1200 ) 1201 this_schema = self.schema_columns_sql(expression.this) 1202 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1203 1204 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1205 1206 return super().createable_sql(expression, locations) 1207 1208 def create_sql(self, expression: exp.Create) -> str: 1209 # The comment property comes last in CTAS statements, i.e. after the query 1210 query = expression.expression 1211 if isinstance(query, exp.Query): 1212 comment_prop = expression.find(exp.SchemaCommentProperty) 1213 if comment_prop: 1214 comment_prop.pop() 1215 query.replace(exp.paren(query)) 1216 else: 1217 comment_prop = None 1218 1219 create_sql = super().create_sql(expression) 1220 1221 comment_sql = self.sql(comment_prop) 1222 comment_sql = f" {comment_sql}" if comment_sql else "" 1223 1224 return f"{create_sql}{comment_sql}" 1225 1226 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1227 this = self.indent(self.sql(expression, "this")) 1228 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1229 1230 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1231 this = self.sql(expression, "this") 1232 this = f" {this}" if this else "" 1233 expr = self.sql(expression, "expression") 1234 expr = f" {expr}" if expr else "" 1235 index_type = self.sql(expression, "index_type") 1236 index_type = f" TYPE {index_type}" if index_type else "" 1237 granularity = self.sql(expression, "granularity") 1238 granularity = f" GRANULARITY {granularity}" if granularity else "" 1239 1240 return f"INDEX{this}{expr}{index_type}{granularity}" 1241 1242 def partition_sql(self, expression: exp.Partition) -> str: 1243 return f"PARTITION {self.expressions(expression, flat=True)}" 1244 1245 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1246 return f"ID {self.sql(expression.this)}" 1247 1248 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1249 return ( 1250 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1251 ) 1252 1253 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1254 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1255 1256 def is_sql(self, expression: exp.Is) -> str: 1257 is_sql = super().is_sql(expression) 1258 1259 if isinstance(expression.parent, exp.Not): 1260 # value IS NOT NULL -> NOT (value IS NULL) 1261 is_sql = self.wrap(is_sql) 1262 1263 return is_sql
162class ClickHouse(Dialect): 163 NORMALIZE_FUNCTIONS: bool | str = False 164 NULL_ORDERING = "nulls_are_last" 165 SUPPORTS_USER_DEFINED_TYPES = False 166 SAFE_DIVISION = True 167 LOG_BASE_FIRST: t.Optional[bool] = None 168 FORCE_EARLY_ALIAS_REF_EXPANSION = True 169 PRESERVE_ORIGINAL_NAMES = True 170 171 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 172 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 173 174 UNESCAPED_SEQUENCES = { 175 "\\0": "\0", 176 } 177 178 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 179 180 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 181 exp.Except: False, 182 exp.Intersect: False, 183 exp.Union: None, 184 } 185 186 class Tokenizer(tokens.Tokenizer): 187 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 188 IDENTIFIERS = ['"', "`"] 189 IDENTIFIER_ESCAPES = ["\\"] 190 STRING_ESCAPES = ["'", "\\"] 191 BIT_STRINGS = [("0b", "")] 192 HEX_STRINGS = [("0x", ""), ("0X", "")] 193 HEREDOC_STRINGS = ["$"] 194 195 KEYWORDS = { 196 **tokens.Tokenizer.KEYWORDS, 197 "ATTACH": TokenType.COMMAND, 198 "DATE32": TokenType.DATE32, 199 "DATETIME64": TokenType.DATETIME64, 200 "DICTIONARY": TokenType.DICTIONARY, 201 "ENUM8": TokenType.ENUM8, 202 "ENUM16": TokenType.ENUM16, 203 "FINAL": TokenType.FINAL, 204 "FIXEDSTRING": TokenType.FIXEDSTRING, 205 "FLOAT32": TokenType.FLOAT, 206 "FLOAT64": TokenType.DOUBLE, 207 "GLOBAL": TokenType.GLOBAL, 208 "INT256": TokenType.INT256, 209 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 210 "MAP": TokenType.MAP, 211 "NESTED": TokenType.NESTED, 212 "SAMPLE": TokenType.TABLE_SAMPLE, 213 "TUPLE": TokenType.STRUCT, 214 "UINT128": TokenType.UINT128, 215 "UINT16": TokenType.USMALLINT, 216 "UINT256": TokenType.UINT256, 217 "UINT32": TokenType.UINT, 218 "UINT64": TokenType.UBIGINT, 219 "UINT8": TokenType.UTINYINT, 220 "IPV4": TokenType.IPV4, 221 "IPV6": TokenType.IPV6, 222 "POINT": TokenType.POINT, 223 "RING": TokenType.RING, 224 "LINESTRING": TokenType.LINESTRING, 225 "MULTILINESTRING": TokenType.MULTILINESTRING, 226 "POLYGON": TokenType.POLYGON, 227 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 228 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 229 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 230 "SYSTEM": TokenType.COMMAND, 231 "PREWHERE": TokenType.PREWHERE, 232 } 233 KEYWORDS.pop("/*+") 234 235 SINGLE_TOKENS = { 236 **tokens.Tokenizer.SINGLE_TOKENS, 237 "$": TokenType.HEREDOC_STRING, 238 } 239 240 class Parser(parser.Parser): 241 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 242 # * select x from t1 union all select x from t2 limit 1; 243 # * select x from t1 union all (select x from t2 limit 1); 244 MODIFIERS_ATTACHED_TO_SET_OP = False 245 INTERVAL_SPANS = False 246 247 FUNCTIONS = { 248 **parser.Parser.FUNCTIONS, 249 "ANY": exp.AnyValue.from_arg_list, 250 "ARRAYSUM": exp.ArraySum.from_arg_list, 251 "COUNTIF": _build_count_if, 252 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 253 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 254 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 255 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 256 "DATE_FORMAT": _build_date_format, 257 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 258 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 259 "FORMATDATETIME": _build_date_format, 260 "JSONEXTRACTSTRING": build_json_extract_path( 261 exp.JSONExtractScalar, zero_based_indexing=False 262 ), 263 "MAP": parser.build_var_map, 264 "MATCH": exp.RegexpLike.from_arg_list, 265 "RANDCANONICAL": exp.Rand.from_arg_list, 266 "STR_TO_DATE": _build_str_to_date, 267 "TUPLE": exp.Struct.from_arg_list, 268 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 269 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 270 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 271 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 272 "UNIQ": exp.ApproxDistinct.from_arg_list, 273 "XOR": lambda args: exp.Xor(expressions=args), 274 "MD5": exp.MD5Digest.from_arg_list, 275 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 276 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 277 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 278 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 279 } 280 FUNCTIONS.pop("TRANSFORM") 281 282 AGG_FUNCTIONS = { 283 "count", 284 "min", 285 "max", 286 "sum", 287 "avg", 288 "any", 289 "stddevPop", 290 "stddevSamp", 291 "varPop", 292 "varSamp", 293 "corr", 294 "covarPop", 295 "covarSamp", 296 "entropy", 297 "exponentialMovingAverage", 298 "intervalLengthSum", 299 "kolmogorovSmirnovTest", 300 "mannWhitneyUTest", 301 "median", 302 "rankCorr", 303 "sumKahan", 304 "studentTTest", 305 "welchTTest", 306 "anyHeavy", 307 "anyLast", 308 "boundingRatio", 309 "first_value", 310 "last_value", 311 "argMin", 312 "argMax", 313 "avgWeighted", 314 "topK", 315 "topKWeighted", 316 "deltaSum", 317 "deltaSumTimestamp", 318 "groupArray", 319 "groupArrayLast", 320 "groupUniqArray", 321 "groupArrayInsertAt", 322 "groupArrayMovingAvg", 323 "groupArrayMovingSum", 324 "groupArraySample", 325 "groupBitAnd", 326 "groupBitOr", 327 "groupBitXor", 328 "groupBitmap", 329 "groupBitmapAnd", 330 "groupBitmapOr", 331 "groupBitmapXor", 332 "sumWithOverflow", 333 "sumMap", 334 "minMap", 335 "maxMap", 336 "skewSamp", 337 "skewPop", 338 "kurtSamp", 339 "kurtPop", 340 "uniq", 341 "uniqExact", 342 "uniqCombined", 343 "uniqCombined64", 344 "uniqHLL12", 345 "uniqTheta", 346 "quantile", 347 "quantiles", 348 "quantileExact", 349 "quantilesExact", 350 "quantileExactLow", 351 "quantilesExactLow", 352 "quantileExactHigh", 353 "quantilesExactHigh", 354 "quantileExactWeighted", 355 "quantilesExactWeighted", 356 "quantileTiming", 357 "quantilesTiming", 358 "quantileTimingWeighted", 359 "quantilesTimingWeighted", 360 "quantileDeterministic", 361 "quantilesDeterministic", 362 "quantileTDigest", 363 "quantilesTDigest", 364 "quantileTDigestWeighted", 365 "quantilesTDigestWeighted", 366 "quantileBFloat16", 367 "quantilesBFloat16", 368 "quantileBFloat16Weighted", 369 "quantilesBFloat16Weighted", 370 "simpleLinearRegression", 371 "stochasticLinearRegression", 372 "stochasticLogisticRegression", 373 "categoricalInformationValue", 374 "contingency", 375 "cramersV", 376 "cramersVBiasCorrected", 377 "theilsU", 378 "maxIntersections", 379 "maxIntersectionsPosition", 380 "meanZTest", 381 "quantileInterpolatedWeighted", 382 "quantilesInterpolatedWeighted", 383 "quantileGK", 384 "quantilesGK", 385 "sparkBar", 386 "sumCount", 387 "largestTriangleThreeBuckets", 388 "histogram", 389 "sequenceMatch", 390 "sequenceCount", 391 "windowFunnel", 392 "retention", 393 "uniqUpTo", 394 "sequenceNextNode", 395 "exponentialTimeDecayedAvg", 396 } 397 398 AGG_FUNCTIONS_SUFFIXES = [ 399 "If", 400 "Array", 401 "ArrayIf", 402 "Map", 403 "SimpleState", 404 "State", 405 "Merge", 406 "MergeState", 407 "ForEach", 408 "Distinct", 409 "OrDefault", 410 "OrNull", 411 "Resample", 412 "ArgMin", 413 "ArgMax", 414 ] 415 416 FUNC_TOKENS = { 417 *parser.Parser.FUNC_TOKENS, 418 TokenType.SET, 419 } 420 421 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 422 423 ID_VAR_TOKENS = { 424 *parser.Parser.ID_VAR_TOKENS, 425 TokenType.LIKE, 426 } 427 428 AGG_FUNC_MAPPING = ( 429 lambda functions, suffixes: { 430 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 431 } 432 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 433 434 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 435 436 FUNCTION_PARSERS = { 437 **parser.Parser.FUNCTION_PARSERS, 438 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 439 "QUANTILE": lambda self: self._parse_quantile(), 440 "MEDIAN": lambda self: self._parse_quantile(), 441 "COLUMNS": lambda self: self._parse_columns(), 442 } 443 444 FUNCTION_PARSERS.pop("MATCH") 445 446 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 447 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 448 449 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 450 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 451 452 RANGE_PARSERS = { 453 **parser.Parser.RANGE_PARSERS, 454 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 455 and self._parse_in(this, is_global=True), 456 } 457 458 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 459 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 460 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 461 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 462 463 JOIN_KINDS = { 464 *parser.Parser.JOIN_KINDS, 465 TokenType.ANY, 466 TokenType.ASOF, 467 TokenType.ARRAY, 468 } 469 470 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 471 TokenType.ANY, 472 TokenType.ARRAY, 473 TokenType.FINAL, 474 TokenType.FORMAT, 475 TokenType.SETTINGS, 476 } 477 478 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 479 TokenType.FORMAT, 480 } 481 482 LOG_DEFAULTS_TO_LN = True 483 484 QUERY_MODIFIER_PARSERS = { 485 **parser.Parser.QUERY_MODIFIER_PARSERS, 486 TokenType.SETTINGS: lambda self: ( 487 "settings", 488 self._advance() or self._parse_csv(self._parse_assignment), 489 ), 490 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 491 } 492 493 CONSTRAINT_PARSERS = { 494 **parser.Parser.CONSTRAINT_PARSERS, 495 "INDEX": lambda self: self._parse_index_constraint(), 496 "CODEC": lambda self: self._parse_compress(), 497 } 498 499 ALTER_PARSERS = { 500 **parser.Parser.ALTER_PARSERS, 501 "REPLACE": lambda self: self._parse_alter_table_replace(), 502 } 503 504 SCHEMA_UNNAMED_CONSTRAINTS = { 505 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 506 "INDEX", 507 } 508 509 PLACEHOLDER_PARSERS = { 510 **parser.Parser.PLACEHOLDER_PARSERS, 511 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 512 } 513 514 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 515 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 516 return self._parse_lambda() 517 518 def _parse_types( 519 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 520 ) -> t.Optional[exp.Expression]: 521 dtype = super()._parse_types( 522 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 523 ) 524 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 525 # Mark every type as non-nullable which is ClickHouse's default, unless it's 526 # already marked as nullable. This marker helps us transpile types from other 527 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 528 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 529 # fail in ClickHouse without the `Nullable` type constructor. 530 dtype.set("nullable", False) 531 532 return dtype 533 534 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 535 index = self._index 536 this = self._parse_bitwise() 537 if self._match(TokenType.FROM): 538 self._retreat(index) 539 return super()._parse_extract() 540 541 # We return Anonymous here because extract and regexpExtract have different semantics, 542 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 543 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 544 # 545 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 546 self._match(TokenType.COMMA) 547 return self.expression( 548 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 549 ) 550 551 def _parse_assignment(self) -> t.Optional[exp.Expression]: 552 this = super()._parse_assignment() 553 554 if self._match(TokenType.PLACEHOLDER): 555 return self.expression( 556 exp.If, 557 this=this, 558 true=self._parse_assignment(), 559 false=self._match(TokenType.COLON) and self._parse_assignment(), 560 ) 561 562 return this 563 564 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 565 """ 566 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 567 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 568 """ 569 index = self._index 570 571 this = self._parse_id_var() 572 self._match(TokenType.COLON) 573 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 574 self._match_text_seq("IDENTIFIER") and "Identifier" 575 ) 576 577 if not kind: 578 self._retreat(index) 579 return None 580 elif not self._match(TokenType.R_BRACE): 581 self.raise_error("Expecting }") 582 583 return self.expression(exp.Placeholder, this=this, kind=kind) 584 585 def _parse_bracket( 586 self, this: t.Optional[exp.Expression] = None 587 ) -> t.Optional[exp.Expression]: 588 l_brace = self._match(TokenType.L_BRACE, advance=False) 589 bracket = super()._parse_bracket(this) 590 591 if l_brace and isinstance(bracket, exp.Struct): 592 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 593 for expression in bracket.expressions: 594 if not isinstance(expression, exp.PropertyEQ): 595 break 596 597 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 598 varmap.args["values"].append("expressions", expression.expression) 599 600 return varmap 601 602 return bracket 603 604 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 605 this = super()._parse_in(this) 606 this.set("is_global", is_global) 607 return this 608 609 def _parse_table( 610 self, 611 schema: bool = False, 612 joins: bool = False, 613 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 614 parse_bracket: bool = False, 615 is_db_reference: bool = False, 616 parse_partition: bool = False, 617 ) -> t.Optional[exp.Expression]: 618 this = super()._parse_table( 619 schema=schema, 620 joins=joins, 621 alias_tokens=alias_tokens, 622 parse_bracket=parse_bracket, 623 is_db_reference=is_db_reference, 624 ) 625 626 if self._match(TokenType.FINAL): 627 this = self.expression(exp.Final, this=this) 628 629 return this 630 631 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 632 return super()._parse_position(haystack_first=True) 633 634 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 635 def _parse_cte(self) -> exp.CTE: 636 # WITH <identifier> AS <subquery expression> 637 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 638 639 if not cte: 640 # WITH <expression> AS <identifier> 641 cte = self.expression( 642 exp.CTE, 643 this=self._parse_assignment(), 644 alias=self._parse_table_alias(), 645 scalar=True, 646 ) 647 648 return cte 649 650 def _parse_join_parts( 651 self, 652 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 653 is_global = self._match(TokenType.GLOBAL) and self._prev 654 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 655 656 if kind_pre: 657 kind = self._match_set(self.JOIN_KINDS) and self._prev 658 side = self._match_set(self.JOIN_SIDES) and self._prev 659 return is_global, side, kind 660 661 return ( 662 is_global, 663 self._match_set(self.JOIN_SIDES) and self._prev, 664 self._match_set(self.JOIN_KINDS) and self._prev, 665 ) 666 667 def _parse_join( 668 self, skip_join_token: bool = False, parse_bracket: bool = False 669 ) -> t.Optional[exp.Join]: 670 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 671 if join: 672 join.set("global", join.args.pop("method", None)) 673 674 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 675 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 676 if join.kind == "ARRAY": 677 for table in join.find_all(exp.Table): 678 table.replace(table.to_column()) 679 680 return join 681 682 def _parse_function( 683 self, 684 functions: t.Optional[t.Dict[str, t.Callable]] = None, 685 anonymous: bool = False, 686 optional_parens: bool = True, 687 any_token: bool = False, 688 ) -> t.Optional[exp.Expression]: 689 expr = super()._parse_function( 690 functions=functions, 691 anonymous=anonymous, 692 optional_parens=optional_parens, 693 any_token=any_token, 694 ) 695 696 func = expr.this if isinstance(expr, exp.Window) else expr 697 698 # Aggregate functions can be split in 2 parts: <func_name><suffix> 699 parts = ( 700 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 701 ) 702 703 if parts: 704 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 705 params = self._parse_func_params(anon_func) 706 707 kwargs = { 708 "this": anon_func.this, 709 "expressions": anon_func.expressions, 710 } 711 if parts[1]: 712 kwargs["parts"] = parts 713 exp_class: t.Type[exp.Expression] = ( 714 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 715 ) 716 else: 717 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 718 719 kwargs["exp_class"] = exp_class 720 if params: 721 kwargs["params"] = params 722 723 func = self.expression(**kwargs) 724 725 if isinstance(expr, exp.Window): 726 # The window's func was parsed as Anonymous in base parser, fix its 727 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 728 expr.set("this", func) 729 elif params: 730 # Params have blocked super()._parse_function() from parsing the following window 731 # (if that exists) as they're standing between the function call and the window spec 732 expr = self._parse_window(func) 733 else: 734 expr = func 735 736 return expr 737 738 def _parse_func_params( 739 self, this: t.Optional[exp.Func] = None 740 ) -> t.Optional[t.List[exp.Expression]]: 741 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 742 return self._parse_csv(self._parse_lambda) 743 744 if self._match(TokenType.L_PAREN): 745 params = self._parse_csv(self._parse_lambda) 746 self._match_r_paren(this) 747 return params 748 749 return None 750 751 def _parse_quantile(self) -> exp.Quantile: 752 this = self._parse_lambda() 753 params = self._parse_func_params() 754 if params: 755 return self.expression(exp.Quantile, this=params[0], quantile=this) 756 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 757 758 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 759 return super()._parse_wrapped_id_vars(optional=True) 760 761 def _parse_primary_key( 762 self, wrapped_optional: bool = False, in_props: bool = False 763 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 764 return super()._parse_primary_key( 765 wrapped_optional=wrapped_optional or in_props, in_props=in_props 766 ) 767 768 def _parse_on_property(self) -> t.Optional[exp.Expression]: 769 index = self._index 770 if self._match_text_seq("CLUSTER"): 771 this = self._parse_id_var() 772 if this: 773 return self.expression(exp.OnCluster, this=this) 774 else: 775 self._retreat(index) 776 return None 777 778 def _parse_index_constraint( 779 self, kind: t.Optional[str] = None 780 ) -> exp.IndexColumnConstraint: 781 # INDEX name1 expr TYPE type1(args) GRANULARITY value 782 this = self._parse_id_var() 783 expression = self._parse_assignment() 784 785 index_type = self._match_text_seq("TYPE") and ( 786 self._parse_function() or self._parse_var() 787 ) 788 789 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 790 791 return self.expression( 792 exp.IndexColumnConstraint, 793 this=this, 794 expression=expression, 795 index_type=index_type, 796 granularity=granularity, 797 ) 798 799 def _parse_partition(self) -> t.Optional[exp.Partition]: 800 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 801 if not self._match(TokenType.PARTITION): 802 return None 803 804 if self._match_text_seq("ID"): 805 # Corresponds to the PARTITION ID <string_value> syntax 806 expressions: t.List[exp.Expression] = [ 807 self.expression(exp.PartitionId, this=self._parse_string()) 808 ] 809 else: 810 expressions = self._parse_expressions() 811 812 return self.expression(exp.Partition, expressions=expressions) 813 814 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 815 partition = self._parse_partition() 816 817 if not partition or not self._match(TokenType.FROM): 818 return None 819 820 return self.expression( 821 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 822 ) 823 824 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 825 if not self._match_text_seq("PROJECTION"): 826 return None 827 828 return self.expression( 829 exp.ProjectionDef, 830 this=self._parse_id_var(), 831 expression=self._parse_wrapped(self._parse_statement), 832 ) 833 834 def _parse_constraint(self) -> t.Optional[exp.Expression]: 835 return super()._parse_constraint() or self._parse_projection_def() 836 837 def _parse_alias( 838 self, this: t.Optional[exp.Expression], explicit: bool = False 839 ) -> t.Optional[exp.Expression]: 840 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 841 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 842 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 843 return this 844 845 return super()._parse_alias(this=this, explicit=explicit) 846 847 def _parse_expression(self) -> t.Optional[exp.Expression]: 848 this = super()._parse_expression() 849 850 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 851 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 852 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 853 self._match(TokenType.R_PAREN) 854 855 return this 856 857 def _parse_columns(self) -> exp.Expression: 858 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 859 860 while self._next and self._match_text_seq(")", "APPLY", "("): 861 self._match(TokenType.R_PAREN) 862 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 863 return this 864 865 class Generator(generator.Generator): 866 QUERY_HINTS = False 867 STRUCT_DELIMITER = ("(", ")") 868 NVL2_SUPPORTED = False 869 TABLESAMPLE_REQUIRES_PARENS = False 870 TABLESAMPLE_SIZE_IS_ROWS = False 871 TABLESAMPLE_KEYWORDS = "SAMPLE" 872 LAST_DAY_SUPPORTS_DATE_PART = False 873 CAN_IMPLEMENT_ARRAY_ANY = True 874 SUPPORTS_TO_NUMBER = False 875 JOIN_HINTS = False 876 TABLE_HINTS = False 877 GROUPINGS_SEP = "" 878 SET_OP_MODIFIERS = False 879 SUPPORTS_TABLE_ALIAS_COLUMNS = False 880 VALUES_AS_TABLE = False 881 ARRAY_SIZE_NAME = "LENGTH" 882 883 STRING_TYPE_MAPPING = { 884 exp.DataType.Type.CHAR: "String", 885 exp.DataType.Type.LONGBLOB: "String", 886 exp.DataType.Type.LONGTEXT: "String", 887 exp.DataType.Type.MEDIUMBLOB: "String", 888 exp.DataType.Type.MEDIUMTEXT: "String", 889 exp.DataType.Type.TINYBLOB: "String", 890 exp.DataType.Type.TINYTEXT: "String", 891 exp.DataType.Type.TEXT: "String", 892 exp.DataType.Type.VARBINARY: "String", 893 exp.DataType.Type.VARCHAR: "String", 894 } 895 896 SUPPORTED_JSON_PATH_PARTS = { 897 exp.JSONPathKey, 898 exp.JSONPathRoot, 899 exp.JSONPathSubscript, 900 } 901 902 TYPE_MAPPING = { 903 **generator.Generator.TYPE_MAPPING, 904 **STRING_TYPE_MAPPING, 905 exp.DataType.Type.ARRAY: "Array", 906 exp.DataType.Type.BOOLEAN: "Bool", 907 exp.DataType.Type.BIGINT: "Int64", 908 exp.DataType.Type.DATE32: "Date32", 909 exp.DataType.Type.DATETIME: "DateTime", 910 exp.DataType.Type.DATETIME2: "DateTime", 911 exp.DataType.Type.SMALLDATETIME: "DateTime", 912 exp.DataType.Type.DATETIME64: "DateTime64", 913 exp.DataType.Type.DECIMAL: "Decimal", 914 exp.DataType.Type.DECIMAL32: "Decimal32", 915 exp.DataType.Type.DECIMAL64: "Decimal64", 916 exp.DataType.Type.DECIMAL128: "Decimal128", 917 exp.DataType.Type.DECIMAL256: "Decimal256", 918 exp.DataType.Type.TIMESTAMP: "DateTime", 919 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 920 exp.DataType.Type.DOUBLE: "Float64", 921 exp.DataType.Type.ENUM: "Enum", 922 exp.DataType.Type.ENUM8: "Enum8", 923 exp.DataType.Type.ENUM16: "Enum16", 924 exp.DataType.Type.FIXEDSTRING: "FixedString", 925 exp.DataType.Type.FLOAT: "Float32", 926 exp.DataType.Type.INT: "Int32", 927 exp.DataType.Type.MEDIUMINT: "Int32", 928 exp.DataType.Type.INT128: "Int128", 929 exp.DataType.Type.INT256: "Int256", 930 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 931 exp.DataType.Type.MAP: "Map", 932 exp.DataType.Type.NESTED: "Nested", 933 exp.DataType.Type.SMALLINT: "Int16", 934 exp.DataType.Type.STRUCT: "Tuple", 935 exp.DataType.Type.TINYINT: "Int8", 936 exp.DataType.Type.UBIGINT: "UInt64", 937 exp.DataType.Type.UINT: "UInt32", 938 exp.DataType.Type.UINT128: "UInt128", 939 exp.DataType.Type.UINT256: "UInt256", 940 exp.DataType.Type.USMALLINT: "UInt16", 941 exp.DataType.Type.UTINYINT: "UInt8", 942 exp.DataType.Type.IPV4: "IPv4", 943 exp.DataType.Type.IPV6: "IPv6", 944 exp.DataType.Type.POINT: "Point", 945 exp.DataType.Type.RING: "Ring", 946 exp.DataType.Type.LINESTRING: "LineString", 947 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 948 exp.DataType.Type.POLYGON: "Polygon", 949 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 950 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 951 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 952 } 953 954 TRANSFORMS = { 955 **generator.Generator.TRANSFORMS, 956 exp.AnyValue: rename_func("any"), 957 exp.ApproxDistinct: rename_func("uniq"), 958 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 959 exp.ArraySum: rename_func("arraySum"), 960 exp.ArgMax: arg_max_or_min_no_count("argMax"), 961 exp.ArgMin: arg_max_or_min_no_count("argMin"), 962 exp.Array: inline_array_sql, 963 exp.CastToStrType: rename_func("CAST"), 964 exp.CountIf: rename_func("countIf"), 965 exp.CompressColumnConstraint: lambda self, 966 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 967 exp.ComputedColumnConstraint: lambda self, 968 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 969 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 970 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 971 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 972 exp.DateStrToDate: rename_func("toDate"), 973 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 974 exp.Explode: rename_func("arrayJoin"), 975 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 976 exp.IsNan: rename_func("isNaN"), 977 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 978 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 979 exp.JSONPathKey: json_path_key_only_name, 980 exp.JSONPathRoot: lambda *_: "", 981 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 982 exp.Median: rename_func("median"), 983 exp.Nullif: rename_func("nullIf"), 984 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 985 exp.Pivot: no_pivot_sql, 986 exp.Quantile: _quantile_sql, 987 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 988 exp.Rand: rename_func("randCanonical"), 989 exp.StartsWith: rename_func("startsWith"), 990 exp.StrPosition: lambda self, e: self.func( 991 "position", e.this, e.args.get("substr"), e.args.get("position") 992 ), 993 exp.TimeToStr: lambda self, e: self.func( 994 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 995 ), 996 exp.TimeStrToTime: _timestrtotime_sql, 997 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 998 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 999 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 1000 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1001 exp.MD5Digest: rename_func("MD5"), 1002 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1003 exp.SHA: rename_func("SHA1"), 1004 exp.SHA2: sha256_sql, 1005 exp.UnixToTime: _unix_to_time_sql, 1006 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1007 exp.Trim: trim_sql, 1008 exp.Variance: rename_func("varSamp"), 1009 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1010 exp.Stddev: rename_func("stddevSamp"), 1011 exp.Chr: rename_func("CHAR"), 1012 exp.Lag: lambda self, e: self.func( 1013 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1014 ), 1015 exp.Lead: lambda self, e: self.func( 1016 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1017 ), 1018 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1019 rename_func("editDistance") 1020 ), 1021 } 1022 1023 PROPERTIES_LOCATION = { 1024 **generator.Generator.PROPERTIES_LOCATION, 1025 exp.OnCluster: exp.Properties.Location.POST_NAME, 1026 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1027 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1028 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1029 } 1030 1031 # There's no list in docs, but it can be found in Clickhouse code 1032 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1033 ON_CLUSTER_TARGETS = { 1034 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1035 "DATABASE", 1036 "TABLE", 1037 "VIEW", 1038 "DICTIONARY", 1039 "INDEX", 1040 "FUNCTION", 1041 "NAMED COLLECTION", 1042 } 1043 1044 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1045 NON_NULLABLE_TYPES = { 1046 exp.DataType.Type.ARRAY, 1047 exp.DataType.Type.MAP, 1048 exp.DataType.Type.STRUCT, 1049 exp.DataType.Type.POINT, 1050 exp.DataType.Type.RING, 1051 exp.DataType.Type.LINESTRING, 1052 exp.DataType.Type.MULTILINESTRING, 1053 exp.DataType.Type.POLYGON, 1054 exp.DataType.Type.MULTIPOLYGON, 1055 } 1056 1057 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1058 strtodate_sql = self.function_fallback_sql(expression) 1059 1060 if not isinstance(expression.parent, exp.Cast): 1061 # StrToDate returns DATEs in other dialects (eg. postgres), so 1062 # this branch aims to improve the transpilation to clickhouse 1063 return f"CAST({strtodate_sql} AS DATE)" 1064 1065 return strtodate_sql 1066 1067 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1068 this = expression.this 1069 1070 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1071 return self.sql(this) 1072 1073 return super().cast_sql(expression, safe_prefix=safe_prefix) 1074 1075 def trycast_sql(self, expression: exp.TryCast) -> str: 1076 dtype = expression.to 1077 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1078 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1079 dtype.set("nullable", True) 1080 1081 return super().cast_sql(expression) 1082 1083 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1084 this = self.json_path_part(expression.this) 1085 return str(int(this) + 1) if is_int(this) else this 1086 1087 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1088 return f"AS {self.sql(expression, 'this')}" 1089 1090 def _any_to_has( 1091 self, 1092 expression: exp.EQ | exp.NEQ, 1093 default: t.Callable[[t.Any], str], 1094 prefix: str = "", 1095 ) -> str: 1096 if isinstance(expression.left, exp.Any): 1097 arr = expression.left 1098 this = expression.right 1099 elif isinstance(expression.right, exp.Any): 1100 arr = expression.right 1101 this = expression.left 1102 else: 1103 return default(expression) 1104 1105 return prefix + self.func("has", arr.this.unnest(), this) 1106 1107 def eq_sql(self, expression: exp.EQ) -> str: 1108 return self._any_to_has(expression, super().eq_sql) 1109 1110 def neq_sql(self, expression: exp.NEQ) -> str: 1111 return self._any_to_has(expression, super().neq_sql, "NOT ") 1112 1113 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1114 # Manually add a flag to make the search case-insensitive 1115 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1116 return self.func("match", expression.this, regex) 1117 1118 def datatype_sql(self, expression: exp.DataType) -> str: 1119 # String is the standard ClickHouse type, every other variant is just an alias. 1120 # Additionally, any supplied length parameter will be ignored. 1121 # 1122 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1123 if expression.this in self.STRING_TYPE_MAPPING: 1124 dtype = "String" 1125 else: 1126 dtype = super().datatype_sql(expression) 1127 1128 # This section changes the type to `Nullable(...)` if the following conditions hold: 1129 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1130 # and change their semantics 1131 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1132 # constraint: "Type of Map key must be a type, that can be represented by integer or 1133 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1134 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1135 parent = expression.parent 1136 nullable = expression.args.get("nullable") 1137 if nullable is True or ( 1138 nullable is None 1139 and not ( 1140 isinstance(parent, exp.DataType) 1141 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1142 and expression.index in (None, 0) 1143 ) 1144 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1145 ): 1146 dtype = f"Nullable({dtype})" 1147 1148 return dtype 1149 1150 def cte_sql(self, expression: exp.CTE) -> str: 1151 if expression.args.get("scalar"): 1152 this = self.sql(expression, "this") 1153 alias = self.sql(expression, "alias") 1154 return f"{this} AS {alias}" 1155 1156 return super().cte_sql(expression) 1157 1158 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1159 return super().after_limit_modifiers(expression) + [ 1160 ( 1161 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1162 if expression.args.get("settings") 1163 else "" 1164 ), 1165 ( 1166 self.seg("FORMAT ") + self.sql(expression, "format") 1167 if expression.args.get("format") 1168 else "" 1169 ), 1170 ] 1171 1172 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1173 params = self.expressions(expression, key="params", flat=True) 1174 return self.func(expression.name, *expression.expressions) + f"({params})" 1175 1176 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1177 return self.func(expression.name, *expression.expressions) 1178 1179 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1180 return self.anonymousaggfunc_sql(expression) 1181 1182 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1183 return self.parameterizedagg_sql(expression) 1184 1185 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1186 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1187 1188 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1189 return f"ON CLUSTER {self.sql(expression, 'this')}" 1190 1191 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1192 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1193 exp.Properties.Location.POST_NAME 1194 ): 1195 this_name = self.sql( 1196 expression.this if isinstance(expression.this, exp.Schema) else expression, 1197 "this", 1198 ) 1199 this_properties = " ".join( 1200 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1201 ) 1202 this_schema = self.schema_columns_sql(expression.this) 1203 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1204 1205 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1206 1207 return super().createable_sql(expression, locations) 1208 1209 def create_sql(self, expression: exp.Create) -> str: 1210 # The comment property comes last in CTAS statements, i.e. after the query 1211 query = expression.expression 1212 if isinstance(query, exp.Query): 1213 comment_prop = expression.find(exp.SchemaCommentProperty) 1214 if comment_prop: 1215 comment_prop.pop() 1216 query.replace(exp.paren(query)) 1217 else: 1218 comment_prop = None 1219 1220 create_sql = super().create_sql(expression) 1221 1222 comment_sql = self.sql(comment_prop) 1223 comment_sql = f" {comment_sql}" if comment_sql else "" 1224 1225 return f"{create_sql}{comment_sql}" 1226 1227 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1228 this = self.indent(self.sql(expression, "this")) 1229 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1230 1231 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1232 this = self.sql(expression, "this") 1233 this = f" {this}" if this else "" 1234 expr = self.sql(expression, "expression") 1235 expr = f" {expr}" if expr else "" 1236 index_type = self.sql(expression, "index_type") 1237 index_type = f" TYPE {index_type}" if index_type else "" 1238 granularity = self.sql(expression, "granularity") 1239 granularity = f" GRANULARITY {granularity}" if granularity else "" 1240 1241 return f"INDEX{this}{expr}{index_type}{granularity}" 1242 1243 def partition_sql(self, expression: exp.Partition) -> str: 1244 return f"PARTITION {self.expressions(expression, flat=True)}" 1245 1246 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1247 return f"ID {self.sql(expression.this)}" 1248 1249 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1250 return ( 1251 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1252 ) 1253 1254 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1255 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1256 1257 def is_sql(self, expression: exp.Is) -> str: 1258 is_sql = super().is_sql(expression) 1259 1260 if isinstance(expression.parent, exp.Not): 1261 # value IS NOT NULL -> NOT (value IS NULL) 1262 is_sql = self.wrap(is_sql) 1263 1264 return is_sql
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- PROMOTE_TO_INFERRED_DATETIME_TYPE
- SUPPORTS_VALUES_DEFAULT
- REGEXP_EXTRACT_DEFAULT_GROUP
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
186 class Tokenizer(tokens.Tokenizer): 187 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 188 IDENTIFIERS = ['"', "`"] 189 IDENTIFIER_ESCAPES = ["\\"] 190 STRING_ESCAPES = ["'", "\\"] 191 BIT_STRINGS = [("0b", "")] 192 HEX_STRINGS = [("0x", ""), ("0X", "")] 193 HEREDOC_STRINGS = ["$"] 194 195 KEYWORDS = { 196 **tokens.Tokenizer.KEYWORDS, 197 "ATTACH": TokenType.COMMAND, 198 "DATE32": TokenType.DATE32, 199 "DATETIME64": TokenType.DATETIME64, 200 "DICTIONARY": TokenType.DICTIONARY, 201 "ENUM8": TokenType.ENUM8, 202 "ENUM16": TokenType.ENUM16, 203 "FINAL": TokenType.FINAL, 204 "FIXEDSTRING": TokenType.FIXEDSTRING, 205 "FLOAT32": TokenType.FLOAT, 206 "FLOAT64": TokenType.DOUBLE, 207 "GLOBAL": TokenType.GLOBAL, 208 "INT256": TokenType.INT256, 209 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 210 "MAP": TokenType.MAP, 211 "NESTED": TokenType.NESTED, 212 "SAMPLE": TokenType.TABLE_SAMPLE, 213 "TUPLE": TokenType.STRUCT, 214 "UINT128": TokenType.UINT128, 215 "UINT16": TokenType.USMALLINT, 216 "UINT256": TokenType.UINT256, 217 "UINT32": TokenType.UINT, 218 "UINT64": TokenType.UBIGINT, 219 "UINT8": TokenType.UTINYINT, 220 "IPV4": TokenType.IPV4, 221 "IPV6": TokenType.IPV6, 222 "POINT": TokenType.POINT, 223 "RING": TokenType.RING, 224 "LINESTRING": TokenType.LINESTRING, 225 "MULTILINESTRING": TokenType.MULTILINESTRING, 226 "POLYGON": TokenType.POLYGON, 227 "MULTIPOLYGON": TokenType.MULTIPOLYGON, 228 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 229 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 230 "SYSTEM": TokenType.COMMAND, 231 "PREWHERE": TokenType.PREWHERE, 232 } 233 KEYWORDS.pop("/*+") 234 235 SINGLE_TOKENS = { 236 **tokens.Tokenizer.SINGLE_TOKENS, 237 "$": TokenType.HEREDOC_STRING, 238 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
240 class Parser(parser.Parser): 241 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 242 # * select x from t1 union all select x from t2 limit 1; 243 # * select x from t1 union all (select x from t2 limit 1); 244 MODIFIERS_ATTACHED_TO_SET_OP = False 245 INTERVAL_SPANS = False 246 247 FUNCTIONS = { 248 **parser.Parser.FUNCTIONS, 249 "ANY": exp.AnyValue.from_arg_list, 250 "ARRAYSUM": exp.ArraySum.from_arg_list, 251 "COUNTIF": _build_count_if, 252 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 253 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 254 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 255 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 256 "DATE_FORMAT": _build_date_format, 257 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 258 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 259 "FORMATDATETIME": _build_date_format, 260 "JSONEXTRACTSTRING": build_json_extract_path( 261 exp.JSONExtractScalar, zero_based_indexing=False 262 ), 263 "MAP": parser.build_var_map, 264 "MATCH": exp.RegexpLike.from_arg_list, 265 "RANDCANONICAL": exp.Rand.from_arg_list, 266 "STR_TO_DATE": _build_str_to_date, 267 "TUPLE": exp.Struct.from_arg_list, 268 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 269 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 270 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 271 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 272 "UNIQ": exp.ApproxDistinct.from_arg_list, 273 "XOR": lambda args: exp.Xor(expressions=args), 274 "MD5": exp.MD5Digest.from_arg_list, 275 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 276 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 277 "EDITDISTANCE": exp.Levenshtein.from_arg_list, 278 "LEVENSHTEINDISTANCE": exp.Levenshtein.from_arg_list, 279 } 280 FUNCTIONS.pop("TRANSFORM") 281 282 AGG_FUNCTIONS = { 283 "count", 284 "min", 285 "max", 286 "sum", 287 "avg", 288 "any", 289 "stddevPop", 290 "stddevSamp", 291 "varPop", 292 "varSamp", 293 "corr", 294 "covarPop", 295 "covarSamp", 296 "entropy", 297 "exponentialMovingAverage", 298 "intervalLengthSum", 299 "kolmogorovSmirnovTest", 300 "mannWhitneyUTest", 301 "median", 302 "rankCorr", 303 "sumKahan", 304 "studentTTest", 305 "welchTTest", 306 "anyHeavy", 307 "anyLast", 308 "boundingRatio", 309 "first_value", 310 "last_value", 311 "argMin", 312 "argMax", 313 "avgWeighted", 314 "topK", 315 "topKWeighted", 316 "deltaSum", 317 "deltaSumTimestamp", 318 "groupArray", 319 "groupArrayLast", 320 "groupUniqArray", 321 "groupArrayInsertAt", 322 "groupArrayMovingAvg", 323 "groupArrayMovingSum", 324 "groupArraySample", 325 "groupBitAnd", 326 "groupBitOr", 327 "groupBitXor", 328 "groupBitmap", 329 "groupBitmapAnd", 330 "groupBitmapOr", 331 "groupBitmapXor", 332 "sumWithOverflow", 333 "sumMap", 334 "minMap", 335 "maxMap", 336 "skewSamp", 337 "skewPop", 338 "kurtSamp", 339 "kurtPop", 340 "uniq", 341 "uniqExact", 342 "uniqCombined", 343 "uniqCombined64", 344 "uniqHLL12", 345 "uniqTheta", 346 "quantile", 347 "quantiles", 348 "quantileExact", 349 "quantilesExact", 350 "quantileExactLow", 351 "quantilesExactLow", 352 "quantileExactHigh", 353 "quantilesExactHigh", 354 "quantileExactWeighted", 355 "quantilesExactWeighted", 356 "quantileTiming", 357 "quantilesTiming", 358 "quantileTimingWeighted", 359 "quantilesTimingWeighted", 360 "quantileDeterministic", 361 "quantilesDeterministic", 362 "quantileTDigest", 363 "quantilesTDigest", 364 "quantileTDigestWeighted", 365 "quantilesTDigestWeighted", 366 "quantileBFloat16", 367 "quantilesBFloat16", 368 "quantileBFloat16Weighted", 369 "quantilesBFloat16Weighted", 370 "simpleLinearRegression", 371 "stochasticLinearRegression", 372 "stochasticLogisticRegression", 373 "categoricalInformationValue", 374 "contingency", 375 "cramersV", 376 "cramersVBiasCorrected", 377 "theilsU", 378 "maxIntersections", 379 "maxIntersectionsPosition", 380 "meanZTest", 381 "quantileInterpolatedWeighted", 382 "quantilesInterpolatedWeighted", 383 "quantileGK", 384 "quantilesGK", 385 "sparkBar", 386 "sumCount", 387 "largestTriangleThreeBuckets", 388 "histogram", 389 "sequenceMatch", 390 "sequenceCount", 391 "windowFunnel", 392 "retention", 393 "uniqUpTo", 394 "sequenceNextNode", 395 "exponentialTimeDecayedAvg", 396 } 397 398 AGG_FUNCTIONS_SUFFIXES = [ 399 "If", 400 "Array", 401 "ArrayIf", 402 "Map", 403 "SimpleState", 404 "State", 405 "Merge", 406 "MergeState", 407 "ForEach", 408 "Distinct", 409 "OrDefault", 410 "OrNull", 411 "Resample", 412 "ArgMin", 413 "ArgMax", 414 ] 415 416 FUNC_TOKENS = { 417 *parser.Parser.FUNC_TOKENS, 418 TokenType.SET, 419 } 420 421 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 422 423 ID_VAR_TOKENS = { 424 *parser.Parser.ID_VAR_TOKENS, 425 TokenType.LIKE, 426 } 427 428 AGG_FUNC_MAPPING = ( 429 lambda functions, suffixes: { 430 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 431 } 432 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 433 434 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 435 436 FUNCTION_PARSERS = { 437 **parser.Parser.FUNCTION_PARSERS, 438 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 439 "QUANTILE": lambda self: self._parse_quantile(), 440 "MEDIAN": lambda self: self._parse_quantile(), 441 "COLUMNS": lambda self: self._parse_columns(), 442 } 443 444 FUNCTION_PARSERS.pop("MATCH") 445 446 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 447 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 448 449 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 450 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 451 452 RANGE_PARSERS = { 453 **parser.Parser.RANGE_PARSERS, 454 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 455 and self._parse_in(this, is_global=True), 456 } 457 458 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 459 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 460 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 461 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 462 463 JOIN_KINDS = { 464 *parser.Parser.JOIN_KINDS, 465 TokenType.ANY, 466 TokenType.ASOF, 467 TokenType.ARRAY, 468 } 469 470 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 471 TokenType.ANY, 472 TokenType.ARRAY, 473 TokenType.FINAL, 474 TokenType.FORMAT, 475 TokenType.SETTINGS, 476 } 477 478 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 479 TokenType.FORMAT, 480 } 481 482 LOG_DEFAULTS_TO_LN = True 483 484 QUERY_MODIFIER_PARSERS = { 485 **parser.Parser.QUERY_MODIFIER_PARSERS, 486 TokenType.SETTINGS: lambda self: ( 487 "settings", 488 self._advance() or self._parse_csv(self._parse_assignment), 489 ), 490 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 491 } 492 493 CONSTRAINT_PARSERS = { 494 **parser.Parser.CONSTRAINT_PARSERS, 495 "INDEX": lambda self: self._parse_index_constraint(), 496 "CODEC": lambda self: self._parse_compress(), 497 } 498 499 ALTER_PARSERS = { 500 **parser.Parser.ALTER_PARSERS, 501 "REPLACE": lambda self: self._parse_alter_table_replace(), 502 } 503 504 SCHEMA_UNNAMED_CONSTRAINTS = { 505 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 506 "INDEX", 507 } 508 509 PLACEHOLDER_PARSERS = { 510 **parser.Parser.PLACEHOLDER_PARSERS, 511 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 512 } 513 514 # https://clickhouse.com/docs/en/sql-reference/statements/create/function 515 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 516 return self._parse_lambda() 517 518 def _parse_types( 519 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 520 ) -> t.Optional[exp.Expression]: 521 dtype = super()._parse_types( 522 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 523 ) 524 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 525 # Mark every type as non-nullable which is ClickHouse's default, unless it's 526 # already marked as nullable. This marker helps us transpile types from other 527 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 528 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 529 # fail in ClickHouse without the `Nullable` type constructor. 530 dtype.set("nullable", False) 531 532 return dtype 533 534 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 535 index = self._index 536 this = self._parse_bitwise() 537 if self._match(TokenType.FROM): 538 self._retreat(index) 539 return super()._parse_extract() 540 541 # We return Anonymous here because extract and regexpExtract have different semantics, 542 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 543 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 544 # 545 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 546 self._match(TokenType.COMMA) 547 return self.expression( 548 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 549 ) 550 551 def _parse_assignment(self) -> t.Optional[exp.Expression]: 552 this = super()._parse_assignment() 553 554 if self._match(TokenType.PLACEHOLDER): 555 return self.expression( 556 exp.If, 557 this=this, 558 true=self._parse_assignment(), 559 false=self._match(TokenType.COLON) and self._parse_assignment(), 560 ) 561 562 return this 563 564 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 565 """ 566 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 567 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 568 """ 569 index = self._index 570 571 this = self._parse_id_var() 572 self._match(TokenType.COLON) 573 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 574 self._match_text_seq("IDENTIFIER") and "Identifier" 575 ) 576 577 if not kind: 578 self._retreat(index) 579 return None 580 elif not self._match(TokenType.R_BRACE): 581 self.raise_error("Expecting }") 582 583 return self.expression(exp.Placeholder, this=this, kind=kind) 584 585 def _parse_bracket( 586 self, this: t.Optional[exp.Expression] = None 587 ) -> t.Optional[exp.Expression]: 588 l_brace = self._match(TokenType.L_BRACE, advance=False) 589 bracket = super()._parse_bracket(this) 590 591 if l_brace and isinstance(bracket, exp.Struct): 592 varmap = exp.VarMap(keys=exp.Array(), values=exp.Array()) 593 for expression in bracket.expressions: 594 if not isinstance(expression, exp.PropertyEQ): 595 break 596 597 varmap.args["keys"].append("expressions", exp.Literal.string(expression.name)) 598 varmap.args["values"].append("expressions", expression.expression) 599 600 return varmap 601 602 return bracket 603 604 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 605 this = super()._parse_in(this) 606 this.set("is_global", is_global) 607 return this 608 609 def _parse_table( 610 self, 611 schema: bool = False, 612 joins: bool = False, 613 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 614 parse_bracket: bool = False, 615 is_db_reference: bool = False, 616 parse_partition: bool = False, 617 ) -> t.Optional[exp.Expression]: 618 this = super()._parse_table( 619 schema=schema, 620 joins=joins, 621 alias_tokens=alias_tokens, 622 parse_bracket=parse_bracket, 623 is_db_reference=is_db_reference, 624 ) 625 626 if self._match(TokenType.FINAL): 627 this = self.expression(exp.Final, this=this) 628 629 return this 630 631 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 632 return super()._parse_position(haystack_first=True) 633 634 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 635 def _parse_cte(self) -> exp.CTE: 636 # WITH <identifier> AS <subquery expression> 637 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 638 639 if not cte: 640 # WITH <expression> AS <identifier> 641 cte = self.expression( 642 exp.CTE, 643 this=self._parse_assignment(), 644 alias=self._parse_table_alias(), 645 scalar=True, 646 ) 647 648 return cte 649 650 def _parse_join_parts( 651 self, 652 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 653 is_global = self._match(TokenType.GLOBAL) and self._prev 654 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 655 656 if kind_pre: 657 kind = self._match_set(self.JOIN_KINDS) and self._prev 658 side = self._match_set(self.JOIN_SIDES) and self._prev 659 return is_global, side, kind 660 661 return ( 662 is_global, 663 self._match_set(self.JOIN_SIDES) and self._prev, 664 self._match_set(self.JOIN_KINDS) and self._prev, 665 ) 666 667 def _parse_join( 668 self, skip_join_token: bool = False, parse_bracket: bool = False 669 ) -> t.Optional[exp.Join]: 670 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 671 if join: 672 join.set("global", join.args.pop("method", None)) 673 674 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 675 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 676 if join.kind == "ARRAY": 677 for table in join.find_all(exp.Table): 678 table.replace(table.to_column()) 679 680 return join 681 682 def _parse_function( 683 self, 684 functions: t.Optional[t.Dict[str, t.Callable]] = None, 685 anonymous: bool = False, 686 optional_parens: bool = True, 687 any_token: bool = False, 688 ) -> t.Optional[exp.Expression]: 689 expr = super()._parse_function( 690 functions=functions, 691 anonymous=anonymous, 692 optional_parens=optional_parens, 693 any_token=any_token, 694 ) 695 696 func = expr.this if isinstance(expr, exp.Window) else expr 697 698 # Aggregate functions can be split in 2 parts: <func_name><suffix> 699 parts = ( 700 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 701 ) 702 703 if parts: 704 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 705 params = self._parse_func_params(anon_func) 706 707 kwargs = { 708 "this": anon_func.this, 709 "expressions": anon_func.expressions, 710 } 711 if parts[1]: 712 kwargs["parts"] = parts 713 exp_class: t.Type[exp.Expression] = ( 714 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 715 ) 716 else: 717 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 718 719 kwargs["exp_class"] = exp_class 720 if params: 721 kwargs["params"] = params 722 723 func = self.expression(**kwargs) 724 725 if isinstance(expr, exp.Window): 726 # The window's func was parsed as Anonymous in base parser, fix its 727 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 728 expr.set("this", func) 729 elif params: 730 # Params have blocked super()._parse_function() from parsing the following window 731 # (if that exists) as they're standing between the function call and the window spec 732 expr = self._parse_window(func) 733 else: 734 expr = func 735 736 return expr 737 738 def _parse_func_params( 739 self, this: t.Optional[exp.Func] = None 740 ) -> t.Optional[t.List[exp.Expression]]: 741 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 742 return self._parse_csv(self._parse_lambda) 743 744 if self._match(TokenType.L_PAREN): 745 params = self._parse_csv(self._parse_lambda) 746 self._match_r_paren(this) 747 return params 748 749 return None 750 751 def _parse_quantile(self) -> exp.Quantile: 752 this = self._parse_lambda() 753 params = self._parse_func_params() 754 if params: 755 return self.expression(exp.Quantile, this=params[0], quantile=this) 756 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 757 758 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 759 return super()._parse_wrapped_id_vars(optional=True) 760 761 def _parse_primary_key( 762 self, wrapped_optional: bool = False, in_props: bool = False 763 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 764 return super()._parse_primary_key( 765 wrapped_optional=wrapped_optional or in_props, in_props=in_props 766 ) 767 768 def _parse_on_property(self) -> t.Optional[exp.Expression]: 769 index = self._index 770 if self._match_text_seq("CLUSTER"): 771 this = self._parse_id_var() 772 if this: 773 return self.expression(exp.OnCluster, this=this) 774 else: 775 self._retreat(index) 776 return None 777 778 def _parse_index_constraint( 779 self, kind: t.Optional[str] = None 780 ) -> exp.IndexColumnConstraint: 781 # INDEX name1 expr TYPE type1(args) GRANULARITY value 782 this = self._parse_id_var() 783 expression = self._parse_assignment() 784 785 index_type = self._match_text_seq("TYPE") and ( 786 self._parse_function() or self._parse_var() 787 ) 788 789 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 790 791 return self.expression( 792 exp.IndexColumnConstraint, 793 this=this, 794 expression=expression, 795 index_type=index_type, 796 granularity=granularity, 797 ) 798 799 def _parse_partition(self) -> t.Optional[exp.Partition]: 800 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 801 if not self._match(TokenType.PARTITION): 802 return None 803 804 if self._match_text_seq("ID"): 805 # Corresponds to the PARTITION ID <string_value> syntax 806 expressions: t.List[exp.Expression] = [ 807 self.expression(exp.PartitionId, this=self._parse_string()) 808 ] 809 else: 810 expressions = self._parse_expressions() 811 812 return self.expression(exp.Partition, expressions=expressions) 813 814 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 815 partition = self._parse_partition() 816 817 if not partition or not self._match(TokenType.FROM): 818 return None 819 820 return self.expression( 821 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 822 ) 823 824 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 825 if not self._match_text_seq("PROJECTION"): 826 return None 827 828 return self.expression( 829 exp.ProjectionDef, 830 this=self._parse_id_var(), 831 expression=self._parse_wrapped(self._parse_statement), 832 ) 833 834 def _parse_constraint(self) -> t.Optional[exp.Expression]: 835 return super()._parse_constraint() or self._parse_projection_def() 836 837 def _parse_alias( 838 self, this: t.Optional[exp.Expression], explicit: bool = False 839 ) -> t.Optional[exp.Expression]: 840 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 841 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 842 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 843 return this 844 845 return super()._parse_alias(this=this, explicit=explicit) 846 847 def _parse_expression(self) -> t.Optional[exp.Expression]: 848 this = super()._parse_expression() 849 850 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 851 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 852 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 853 self._match(TokenType.R_PAREN) 854 855 return this 856 857 def _parse_columns(self) -> exp.Expression: 858 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 859 860 while self._next and self._match_text_seq(")", "APPLY", "("): 861 self._match(TokenType.R_PAREN) 862 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 863 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
865 class Generator(generator.Generator): 866 QUERY_HINTS = False 867 STRUCT_DELIMITER = ("(", ")") 868 NVL2_SUPPORTED = False 869 TABLESAMPLE_REQUIRES_PARENS = False 870 TABLESAMPLE_SIZE_IS_ROWS = False 871 TABLESAMPLE_KEYWORDS = "SAMPLE" 872 LAST_DAY_SUPPORTS_DATE_PART = False 873 CAN_IMPLEMENT_ARRAY_ANY = True 874 SUPPORTS_TO_NUMBER = False 875 JOIN_HINTS = False 876 TABLE_HINTS = False 877 GROUPINGS_SEP = "" 878 SET_OP_MODIFIERS = False 879 SUPPORTS_TABLE_ALIAS_COLUMNS = False 880 VALUES_AS_TABLE = False 881 ARRAY_SIZE_NAME = "LENGTH" 882 883 STRING_TYPE_MAPPING = { 884 exp.DataType.Type.CHAR: "String", 885 exp.DataType.Type.LONGBLOB: "String", 886 exp.DataType.Type.LONGTEXT: "String", 887 exp.DataType.Type.MEDIUMBLOB: "String", 888 exp.DataType.Type.MEDIUMTEXT: "String", 889 exp.DataType.Type.TINYBLOB: "String", 890 exp.DataType.Type.TINYTEXT: "String", 891 exp.DataType.Type.TEXT: "String", 892 exp.DataType.Type.VARBINARY: "String", 893 exp.DataType.Type.VARCHAR: "String", 894 } 895 896 SUPPORTED_JSON_PATH_PARTS = { 897 exp.JSONPathKey, 898 exp.JSONPathRoot, 899 exp.JSONPathSubscript, 900 } 901 902 TYPE_MAPPING = { 903 **generator.Generator.TYPE_MAPPING, 904 **STRING_TYPE_MAPPING, 905 exp.DataType.Type.ARRAY: "Array", 906 exp.DataType.Type.BOOLEAN: "Bool", 907 exp.DataType.Type.BIGINT: "Int64", 908 exp.DataType.Type.DATE32: "Date32", 909 exp.DataType.Type.DATETIME: "DateTime", 910 exp.DataType.Type.DATETIME2: "DateTime", 911 exp.DataType.Type.SMALLDATETIME: "DateTime", 912 exp.DataType.Type.DATETIME64: "DateTime64", 913 exp.DataType.Type.DECIMAL: "Decimal", 914 exp.DataType.Type.DECIMAL32: "Decimal32", 915 exp.DataType.Type.DECIMAL64: "Decimal64", 916 exp.DataType.Type.DECIMAL128: "Decimal128", 917 exp.DataType.Type.DECIMAL256: "Decimal256", 918 exp.DataType.Type.TIMESTAMP: "DateTime", 919 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 920 exp.DataType.Type.DOUBLE: "Float64", 921 exp.DataType.Type.ENUM: "Enum", 922 exp.DataType.Type.ENUM8: "Enum8", 923 exp.DataType.Type.ENUM16: "Enum16", 924 exp.DataType.Type.FIXEDSTRING: "FixedString", 925 exp.DataType.Type.FLOAT: "Float32", 926 exp.DataType.Type.INT: "Int32", 927 exp.DataType.Type.MEDIUMINT: "Int32", 928 exp.DataType.Type.INT128: "Int128", 929 exp.DataType.Type.INT256: "Int256", 930 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 931 exp.DataType.Type.MAP: "Map", 932 exp.DataType.Type.NESTED: "Nested", 933 exp.DataType.Type.SMALLINT: "Int16", 934 exp.DataType.Type.STRUCT: "Tuple", 935 exp.DataType.Type.TINYINT: "Int8", 936 exp.DataType.Type.UBIGINT: "UInt64", 937 exp.DataType.Type.UINT: "UInt32", 938 exp.DataType.Type.UINT128: "UInt128", 939 exp.DataType.Type.UINT256: "UInt256", 940 exp.DataType.Type.USMALLINT: "UInt16", 941 exp.DataType.Type.UTINYINT: "UInt8", 942 exp.DataType.Type.IPV4: "IPv4", 943 exp.DataType.Type.IPV6: "IPv6", 944 exp.DataType.Type.POINT: "Point", 945 exp.DataType.Type.RING: "Ring", 946 exp.DataType.Type.LINESTRING: "LineString", 947 exp.DataType.Type.MULTILINESTRING: "MultiLineString", 948 exp.DataType.Type.POLYGON: "Polygon", 949 exp.DataType.Type.MULTIPOLYGON: "MultiPolygon", 950 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 951 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 952 } 953 954 TRANSFORMS = { 955 **generator.Generator.TRANSFORMS, 956 exp.AnyValue: rename_func("any"), 957 exp.ApproxDistinct: rename_func("uniq"), 958 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 959 exp.ArraySum: rename_func("arraySum"), 960 exp.ArgMax: arg_max_or_min_no_count("argMax"), 961 exp.ArgMin: arg_max_or_min_no_count("argMin"), 962 exp.Array: inline_array_sql, 963 exp.CastToStrType: rename_func("CAST"), 964 exp.CountIf: rename_func("countIf"), 965 exp.CompressColumnConstraint: lambda self, 966 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 967 exp.ComputedColumnConstraint: lambda self, 968 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 969 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 970 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 971 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 972 exp.DateStrToDate: rename_func("toDate"), 973 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 974 exp.Explode: rename_func("arrayJoin"), 975 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 976 exp.IsNan: rename_func("isNaN"), 977 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 978 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 979 exp.JSONPathKey: json_path_key_only_name, 980 exp.JSONPathRoot: lambda *_: "", 981 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 982 exp.Median: rename_func("median"), 983 exp.Nullif: rename_func("nullIf"), 984 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 985 exp.Pivot: no_pivot_sql, 986 exp.Quantile: _quantile_sql, 987 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 988 exp.Rand: rename_func("randCanonical"), 989 exp.StartsWith: rename_func("startsWith"), 990 exp.StrPosition: lambda self, e: self.func( 991 "position", e.this, e.args.get("substr"), e.args.get("position") 992 ), 993 exp.TimeToStr: lambda self, e: self.func( 994 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 995 ), 996 exp.TimeStrToTime: _timestrtotime_sql, 997 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 998 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 999 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 1000 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 1001 exp.MD5Digest: rename_func("MD5"), 1002 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 1003 exp.SHA: rename_func("SHA1"), 1004 exp.SHA2: sha256_sql, 1005 exp.UnixToTime: _unix_to_time_sql, 1006 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 1007 exp.Trim: trim_sql, 1008 exp.Variance: rename_func("varSamp"), 1009 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 1010 exp.Stddev: rename_func("stddevSamp"), 1011 exp.Chr: rename_func("CHAR"), 1012 exp.Lag: lambda self, e: self.func( 1013 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 1014 ), 1015 exp.Lead: lambda self, e: self.func( 1016 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 1017 ), 1018 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost", "max_dist")( 1019 rename_func("editDistance") 1020 ), 1021 } 1022 1023 PROPERTIES_LOCATION = { 1024 **generator.Generator.PROPERTIES_LOCATION, 1025 exp.OnCluster: exp.Properties.Location.POST_NAME, 1026 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1027 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 1028 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1029 } 1030 1031 # There's no list in docs, but it can be found in Clickhouse code 1032 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 1033 ON_CLUSTER_TARGETS = { 1034 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 1035 "DATABASE", 1036 "TABLE", 1037 "VIEW", 1038 "DICTIONARY", 1039 "INDEX", 1040 "FUNCTION", 1041 "NAMED COLLECTION", 1042 } 1043 1044 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 1045 NON_NULLABLE_TYPES = { 1046 exp.DataType.Type.ARRAY, 1047 exp.DataType.Type.MAP, 1048 exp.DataType.Type.STRUCT, 1049 exp.DataType.Type.POINT, 1050 exp.DataType.Type.RING, 1051 exp.DataType.Type.LINESTRING, 1052 exp.DataType.Type.MULTILINESTRING, 1053 exp.DataType.Type.POLYGON, 1054 exp.DataType.Type.MULTIPOLYGON, 1055 } 1056 1057 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1058 strtodate_sql = self.function_fallback_sql(expression) 1059 1060 if not isinstance(expression.parent, exp.Cast): 1061 # StrToDate returns DATEs in other dialects (eg. postgres), so 1062 # this branch aims to improve the transpilation to clickhouse 1063 return f"CAST({strtodate_sql} AS DATE)" 1064 1065 return strtodate_sql 1066 1067 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1068 this = expression.this 1069 1070 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1071 return self.sql(this) 1072 1073 return super().cast_sql(expression, safe_prefix=safe_prefix) 1074 1075 def trycast_sql(self, expression: exp.TryCast) -> str: 1076 dtype = expression.to 1077 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1078 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1079 dtype.set("nullable", True) 1080 1081 return super().cast_sql(expression) 1082 1083 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 1084 this = self.json_path_part(expression.this) 1085 return str(int(this) + 1) if is_int(this) else this 1086 1087 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 1088 return f"AS {self.sql(expression, 'this')}" 1089 1090 def _any_to_has( 1091 self, 1092 expression: exp.EQ | exp.NEQ, 1093 default: t.Callable[[t.Any], str], 1094 prefix: str = "", 1095 ) -> str: 1096 if isinstance(expression.left, exp.Any): 1097 arr = expression.left 1098 this = expression.right 1099 elif isinstance(expression.right, exp.Any): 1100 arr = expression.right 1101 this = expression.left 1102 else: 1103 return default(expression) 1104 1105 return prefix + self.func("has", arr.this.unnest(), this) 1106 1107 def eq_sql(self, expression: exp.EQ) -> str: 1108 return self._any_to_has(expression, super().eq_sql) 1109 1110 def neq_sql(self, expression: exp.NEQ) -> str: 1111 return self._any_to_has(expression, super().neq_sql, "NOT ") 1112 1113 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1114 # Manually add a flag to make the search case-insensitive 1115 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1116 return self.func("match", expression.this, regex) 1117 1118 def datatype_sql(self, expression: exp.DataType) -> str: 1119 # String is the standard ClickHouse type, every other variant is just an alias. 1120 # Additionally, any supplied length parameter will be ignored. 1121 # 1122 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1123 if expression.this in self.STRING_TYPE_MAPPING: 1124 dtype = "String" 1125 else: 1126 dtype = super().datatype_sql(expression) 1127 1128 # This section changes the type to `Nullable(...)` if the following conditions hold: 1129 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1130 # and change their semantics 1131 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1132 # constraint: "Type of Map key must be a type, that can be represented by integer or 1133 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1134 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1135 parent = expression.parent 1136 nullable = expression.args.get("nullable") 1137 if nullable is True or ( 1138 nullable is None 1139 and not ( 1140 isinstance(parent, exp.DataType) 1141 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1142 and expression.index in (None, 0) 1143 ) 1144 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1145 ): 1146 dtype = f"Nullable({dtype})" 1147 1148 return dtype 1149 1150 def cte_sql(self, expression: exp.CTE) -> str: 1151 if expression.args.get("scalar"): 1152 this = self.sql(expression, "this") 1153 alias = self.sql(expression, "alias") 1154 return f"{this} AS {alias}" 1155 1156 return super().cte_sql(expression) 1157 1158 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1159 return super().after_limit_modifiers(expression) + [ 1160 ( 1161 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1162 if expression.args.get("settings") 1163 else "" 1164 ), 1165 ( 1166 self.seg("FORMAT ") + self.sql(expression, "format") 1167 if expression.args.get("format") 1168 else "" 1169 ), 1170 ] 1171 1172 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1173 params = self.expressions(expression, key="params", flat=True) 1174 return self.func(expression.name, *expression.expressions) + f"({params})" 1175 1176 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1177 return self.func(expression.name, *expression.expressions) 1178 1179 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1180 return self.anonymousaggfunc_sql(expression) 1181 1182 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1183 return self.parameterizedagg_sql(expression) 1184 1185 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1186 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1187 1188 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1189 return f"ON CLUSTER {self.sql(expression, 'this')}" 1190 1191 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1192 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1193 exp.Properties.Location.POST_NAME 1194 ): 1195 this_name = self.sql( 1196 expression.this if isinstance(expression.this, exp.Schema) else expression, 1197 "this", 1198 ) 1199 this_properties = " ".join( 1200 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1201 ) 1202 this_schema = self.schema_columns_sql(expression.this) 1203 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1204 1205 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1206 1207 return super().createable_sql(expression, locations) 1208 1209 def create_sql(self, expression: exp.Create) -> str: 1210 # The comment property comes last in CTAS statements, i.e. after the query 1211 query = expression.expression 1212 if isinstance(query, exp.Query): 1213 comment_prop = expression.find(exp.SchemaCommentProperty) 1214 if comment_prop: 1215 comment_prop.pop() 1216 query.replace(exp.paren(query)) 1217 else: 1218 comment_prop = None 1219 1220 create_sql = super().create_sql(expression) 1221 1222 comment_sql = self.sql(comment_prop) 1223 comment_sql = f" {comment_sql}" if comment_sql else "" 1224 1225 return f"{create_sql}{comment_sql}" 1226 1227 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1228 this = self.indent(self.sql(expression, "this")) 1229 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1230 1231 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1232 this = self.sql(expression, "this") 1233 this = f" {this}" if this else "" 1234 expr = self.sql(expression, "expression") 1235 expr = f" {expr}" if expr else "" 1236 index_type = self.sql(expression, "index_type") 1237 index_type = f" TYPE {index_type}" if index_type else "" 1238 granularity = self.sql(expression, "granularity") 1239 granularity = f" GRANULARITY {granularity}" if granularity else "" 1240 1241 return f"INDEX{this}{expr}{index_type}{granularity}" 1242 1243 def partition_sql(self, expression: exp.Partition) -> str: 1244 return f"PARTITION {self.expressions(expression, flat=True)}" 1245 1246 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1247 return f"ID {self.sql(expression.this)}" 1248 1249 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1250 return ( 1251 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1252 ) 1253 1254 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1255 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}" 1256 1257 def is_sql(self, expression: exp.Is) -> str: 1258 is_sql = super().is_sql(expression) 1259 1260 if isinstance(expression.parent, exp.Not): 1261 # value IS NOT NULL -> NOT (value IS NULL) 1262 is_sql = self.wrap(is_sql) 1263 1264 return is_sql
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1057 def strtodate_sql(self, expression: exp.StrToDate) -> str: 1058 strtodate_sql = self.function_fallback_sql(expression) 1059 1060 if not isinstance(expression.parent, exp.Cast): 1061 # StrToDate returns DATEs in other dialects (eg. postgres), so 1062 # this branch aims to improve the transpilation to clickhouse 1063 return f"CAST({strtodate_sql} AS DATE)" 1064 1065 return strtodate_sql
1067 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1068 this = expression.this 1069 1070 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 1071 return self.sql(this) 1072 1073 return super().cast_sql(expression, safe_prefix=safe_prefix)
1075 def trycast_sql(self, expression: exp.TryCast) -> str: 1076 dtype = expression.to 1077 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 1078 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 1079 dtype.set("nullable", True) 1080 1081 return super().cast_sql(expression)
1118 def datatype_sql(self, expression: exp.DataType) -> str: 1119 # String is the standard ClickHouse type, every other variant is just an alias. 1120 # Additionally, any supplied length parameter will be ignored. 1121 # 1122 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1123 if expression.this in self.STRING_TYPE_MAPPING: 1124 dtype = "String" 1125 else: 1126 dtype = super().datatype_sql(expression) 1127 1128 # This section changes the type to `Nullable(...)` if the following conditions hold: 1129 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1130 # and change their semantics 1131 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1132 # constraint: "Type of Map key must be a type, that can be represented by integer or 1133 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1134 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1135 parent = expression.parent 1136 nullable = expression.args.get("nullable") 1137 if nullable is True or ( 1138 nullable is None 1139 and not ( 1140 isinstance(parent, exp.DataType) 1141 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1142 and expression.index in (None, 0) 1143 ) 1144 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1145 ): 1146 dtype = f"Nullable({dtype})" 1147 1148 return dtype
1158 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1159 return super().after_limit_modifiers(expression) + [ 1160 ( 1161 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1162 if expression.args.get("settings") 1163 else "" 1164 ), 1165 ( 1166 self.seg("FORMAT ") + self.sql(expression, "format") 1167 if expression.args.get("format") 1168 else "" 1169 ), 1170 ]
1191 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1192 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1193 exp.Properties.Location.POST_NAME 1194 ): 1195 this_name = self.sql( 1196 expression.this if isinstance(expression.this, exp.Schema) else expression, 1197 "this", 1198 ) 1199 this_properties = " ".join( 1200 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1201 ) 1202 this_schema = self.schema_columns_sql(expression.this) 1203 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1204 1205 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1206 1207 return super().createable_sql(expression, locations)
1209 def create_sql(self, expression: exp.Create) -> str: 1210 # The comment property comes last in CTAS statements, i.e. after the query 1211 query = expression.expression 1212 if isinstance(query, exp.Query): 1213 comment_prop = expression.find(exp.SchemaCommentProperty) 1214 if comment_prop: 1215 comment_prop.pop() 1216 query.replace(exp.paren(query)) 1217 else: 1218 comment_prop = None 1219 1220 create_sql = super().create_sql(expression) 1221 1222 comment_sql = self.sql(comment_prop) 1223 comment_sql = f" {comment_sql}" if comment_sql else "" 1224 1225 return f"{create_sql}{comment_sql}"
1231 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1232 this = self.sql(expression, "this") 1233 this = f" {this}" if this else "" 1234 expr = self.sql(expression, "expression") 1235 expr = f" {expr}" if expr else "" 1236 index_type = self.sql(expression, "index_type") 1237 index_type = f" TYPE {index_type}" if index_type else "" 1238 granularity = self.sql(expression, "granularity") 1239 granularity = f" GRANULARITY {granularity}" if granularity else "" 1240 1241 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- SUPPORTS_UNIX_SECONDS
- PARSE_JSON_NAME
- ARRAY_SIZE_DIM_REQUIRED
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql