sqlglot.dialects.hive
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 approx_count_distinct_sql, 9 arg_max_or_min_no_count, 10 create_with_partitions_sql, 11 format_time_lambda, 12 if_sql, 13 is_parse_json, 14 left_to_substring_sql, 15 locate_to_strposition, 16 max_or_greatest, 17 min_or_least, 18 no_ilike_sql, 19 no_recursive_cte_sql, 20 no_safe_divide_sql, 21 no_trycast_sql, 22 regexp_extract_sql, 23 regexp_replace_sql, 24 rename_func, 25 right_to_substring_sql, 26 strposition_to_locate_sql, 27 struct_extract_sql, 28 time_format, 29 timestrtotime_sql, 30 var_map_sql, 31) 32from sqlglot.helper import seq_get 33from sqlglot.parser import parse_var_map 34from sqlglot.tokens import TokenType 35 36# (FuncType, Multiplier) 37DATE_DELTA_INTERVAL = { 38 "YEAR": ("ADD_MONTHS", 12), 39 "MONTH": ("ADD_MONTHS", 1), 40 "QUARTER": ("ADD_MONTHS", 3), 41 "WEEK": ("DATE_ADD", 7), 42 "DAY": ("DATE_ADD", 1), 43} 44 45TIME_DIFF_FACTOR = { 46 "MILLISECOND": " * 1000", 47 "SECOND": "", 48 "MINUTE": " / 60", 49 "HOUR": " / 3600", 50} 51 52DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH") 53 54 55def _create_sql(self, expression: exp.Create) -> str: 56 # remove UNIQUE column constraints 57 for constraint in expression.find_all(exp.UniqueColumnConstraint): 58 if constraint.parent: 59 constraint.parent.pop() 60 61 properties = expression.args.get("properties") 62 temporary = any( 63 isinstance(prop, exp.TemporaryProperty) 64 for prop in (properties.expressions if properties else []) 65 ) 66 67 # CTAS with temp tables map to CREATE TEMPORARY VIEW 68 kind = expression.args["kind"] 69 if kind.upper() == "TABLE" and temporary: 70 if expression.expression: 71 return f"CREATE TEMPORARY VIEW {self.sql(expression, 'this')} AS {self.sql(expression, 'expression')}" 72 else: 73 # CREATE TEMPORARY TABLE may require storage provider 74 expression = self.temporary_storage_provider(expression) 75 76 return create_with_partitions_sql(self, expression) 77 78 79def _add_date_sql(self: Hive.Generator, expression: exp.DateAdd | exp.DateSub) -> str: 80 unit = expression.text("unit").upper() 81 func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1)) 82 83 if isinstance(expression, exp.DateSub): 84 multiplier *= -1 85 86 if expression.expression.is_number: 87 modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier) 88 else: 89 modified_increment = expression.expression 90 if multiplier != 1: 91 modified_increment = exp.Mul( # type: ignore 92 this=modified_increment, expression=exp.Literal.number(multiplier) 93 ) 94 95 return self.func(func, expression.this, modified_increment) 96 97 98def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff) -> str: 99 unit = expression.text("unit").upper() 100 101 factor = TIME_DIFF_FACTOR.get(unit) 102 if factor is not None: 103 left = self.sql(expression, "this") 104 right = self.sql(expression, "expression") 105 sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})" 106 return f"({sec_diff}){factor}" if factor else sec_diff 107 108 months_between = unit in DIFF_MONTH_SWITCH 109 sql_func = "MONTHS_BETWEEN" if months_between else "DATEDIFF" 110 _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1)) 111 multiplier_sql = f" / {multiplier}" if multiplier > 1 else "" 112 diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})" 113 114 if months_between: 115 # MONTHS_BETWEEN returns a float, so we need to truncate the fractional part 116 diff_sql = f"CAST({diff_sql} AS INT)" 117 118 return f"{diff_sql}{multiplier_sql}" 119 120 121def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str: 122 this = expression.this 123 124 if is_parse_json(this): 125 if this.this.is_string: 126 # Since FROM_JSON requires a nested type, we always wrap the json string with 127 # an array to ensure that "naked" strings like "'a'" will be handled correctly 128 wrapped_json = exp.Literal.string(f"[{this.this.name}]") 129 130 from_json = self.func( 131 "FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json) 132 ) 133 to_json = self.func("TO_JSON", from_json) 134 135 # This strips the [, ] delimiters of the dummy array printed by TO_JSON 136 return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1") 137 return self.sql(this) 138 139 return self.func("TO_JSON", this, expression.args.get("options")) 140 141 142def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str: 143 if expression.expression: 144 self.unsupported("Hive SORT_ARRAY does not support a comparator") 145 return f"SORT_ARRAY({self.sql(expression, 'this')})" 146 147 148def _property_sql(self: Hive.Generator, expression: exp.Property) -> str: 149 return f"{self.property_name(expression, string_key=True)}={self.sql(expression, 'value')}" 150 151 152def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str: 153 return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression)) 154 155 156def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str: 157 this = self.sql(expression, "this") 158 time_format = self.format_time(expression) 159 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 160 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 161 return f"CAST({this} AS DATE)" 162 163 164def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str: 165 this = self.sql(expression, "this") 166 time_format = self.format_time(expression) 167 if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 168 this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))" 169 return f"CAST({this} AS TIMESTAMP)" 170 171 172def _time_to_str(self: Hive.Generator, expression: exp.TimeToStr) -> str: 173 this = self.sql(expression, "this") 174 time_format = self.format_time(expression) 175 return f"DATE_FORMAT({this}, {time_format})" 176 177 178def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str: 179 this = self.sql(expression, "this") 180 time_format = self.format_time(expression) 181 if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT): 182 return f"TO_DATE({this}, {time_format})" 183 return f"TO_DATE({this})" 184 185 186class Hive(Dialect): 187 ALIAS_POST_TABLESAMPLE = True 188 IDENTIFIERS_CAN_START_WITH_DIGIT = True 189 SUPPORTS_USER_DEFINED_TYPES = False 190 SAFE_DIVISION = True 191 192 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 193 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 194 195 TIME_MAPPING = { 196 "y": "%Y", 197 "Y": "%Y", 198 "YYYY": "%Y", 199 "yyyy": "%Y", 200 "YY": "%y", 201 "yy": "%y", 202 "MMMM": "%B", 203 "MMM": "%b", 204 "MM": "%m", 205 "M": "%-m", 206 "dd": "%d", 207 "d": "%-d", 208 "HH": "%H", 209 "H": "%-H", 210 "hh": "%I", 211 "h": "%-I", 212 "mm": "%M", 213 "m": "%-M", 214 "ss": "%S", 215 "s": "%-S", 216 "SSSSSS": "%f", 217 "a": "%p", 218 "DD": "%j", 219 "D": "%-j", 220 "E": "%a", 221 "EE": "%a", 222 "EEE": "%a", 223 "EEEE": "%A", 224 } 225 226 DATE_FORMAT = "'yyyy-MM-dd'" 227 DATEINT_FORMAT = "'yyyyMMdd'" 228 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 229 230 class Tokenizer(tokens.Tokenizer): 231 QUOTES = ["'", '"'] 232 IDENTIFIERS = ["`"] 233 STRING_ESCAPES = ["\\"] 234 ENCODE = "utf-8" 235 236 SINGLE_TOKENS = { 237 **tokens.Tokenizer.SINGLE_TOKENS, 238 "$": TokenType.PARAMETER, 239 } 240 241 KEYWORDS = { 242 **tokens.Tokenizer.KEYWORDS, 243 "ADD ARCHIVE": TokenType.COMMAND, 244 "ADD ARCHIVES": TokenType.COMMAND, 245 "ADD FILE": TokenType.COMMAND, 246 "ADD FILES": TokenType.COMMAND, 247 "ADD JAR": TokenType.COMMAND, 248 "ADD JARS": TokenType.COMMAND, 249 "MSCK REPAIR": TokenType.COMMAND, 250 "REFRESH": TokenType.REFRESH, 251 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 252 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 253 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 254 } 255 256 NUMERIC_LITERALS = { 257 "L": "BIGINT", 258 "S": "SMALLINT", 259 "Y": "TINYINT", 260 "D": "DOUBLE", 261 "F": "FLOAT", 262 "BD": "DECIMAL", 263 } 264 265 class Parser(parser.Parser): 266 LOG_DEFAULTS_TO_LN = True 267 STRICT_CAST = False 268 269 FUNCTIONS = { 270 **parser.Parser.FUNCTIONS, 271 "BASE64": exp.ToBase64.from_arg_list, 272 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 273 "COLLECT_SET": exp.SetAgg.from_arg_list, 274 "DATE_ADD": lambda args: exp.TsOrDsAdd( 275 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 276 ), 277 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 278 [ 279 exp.TimeStrToTime(this=seq_get(args, 0)), 280 seq_get(args, 1), 281 ] 282 ), 283 "DATE_SUB": lambda args: exp.TsOrDsAdd( 284 this=seq_get(args, 0), 285 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 286 unit=exp.Literal.string("DAY"), 287 ), 288 "DATEDIFF": lambda args: exp.DateDiff( 289 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 290 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 291 ), 292 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 293 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 294 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 295 "LOCATE": locate_to_strposition, 296 "MAP": parse_var_map, 297 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 298 "PERCENTILE": exp.Quantile.from_arg_list, 299 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 300 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 301 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 302 ), 303 "SIZE": exp.ArraySize.from_arg_list, 304 "SPLIT": exp.RegexpSplit.from_arg_list, 305 "STR_TO_MAP": lambda args: exp.StrToMap( 306 this=seq_get(args, 0), 307 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 308 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 309 ), 310 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 311 "TO_JSON": exp.JSONFormat.from_arg_list, 312 "UNBASE64": exp.FromBase64.from_arg_list, 313 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 314 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 315 } 316 317 NO_PAREN_FUNCTION_PARSERS = { 318 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 319 "TRANSFORM": lambda self: self._parse_transform(), 320 } 321 322 PROPERTY_PARSERS = { 323 **parser.Parser.PROPERTY_PARSERS, 324 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 325 expressions=self._parse_wrapped_csv(self._parse_property) 326 ), 327 } 328 329 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 330 if not self._match(TokenType.L_PAREN, advance=False): 331 self._retreat(self._index - 1) 332 return None 333 334 args = self._parse_wrapped_csv(self._parse_lambda) 335 row_format_before = self._parse_row_format(match_row=True) 336 337 record_writer = None 338 if self._match_text_seq("RECORDWRITER"): 339 record_writer = self._parse_string() 340 341 if not self._match(TokenType.USING): 342 return exp.Transform.from_arg_list(args) 343 344 command_script = self._parse_string() 345 346 self._match(TokenType.ALIAS) 347 schema = self._parse_schema() 348 349 row_format_after = self._parse_row_format(match_row=True) 350 record_reader = None 351 if self._match_text_seq("RECORDREADER"): 352 record_reader = self._parse_string() 353 354 return self.expression( 355 exp.QueryTransform, 356 expressions=args, 357 command_script=command_script, 358 schema=schema, 359 row_format_before=row_format_before, 360 record_writer=record_writer, 361 row_format_after=row_format_after, 362 record_reader=record_reader, 363 ) 364 365 def _parse_types( 366 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 367 ) -> t.Optional[exp.Expression]: 368 """ 369 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 370 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 371 372 spark-sql (default)> select cast(1234 as varchar(2)); 373 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 374 char/varchar type and simply treats them as string type. Please use string type 375 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 376 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 377 378 1234 379 Time taken: 4.265 seconds, Fetched 1 row(s) 380 381 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 382 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 383 384 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 385 """ 386 this = super()._parse_types( 387 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 388 ) 389 390 if this and not schema: 391 return this.transform( 392 lambda node: node.replace(exp.DataType.build("text")) 393 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 394 else node, 395 copy=False, 396 ) 397 398 return this 399 400 def _parse_partition_and_order( 401 self, 402 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 403 return ( 404 self._parse_csv(self._parse_conjunction) 405 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 406 else [], 407 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 408 ) 409 410 class Generator(generator.Generator): 411 LIMIT_FETCH = "LIMIT" 412 TABLESAMPLE_WITH_METHOD = False 413 TABLESAMPLE_SIZE_IS_PERCENT = True 414 JOIN_HINTS = False 415 TABLE_HINTS = False 416 QUERY_HINTS = False 417 INDEX_ON = "ON TABLE" 418 EXTRACT_ALLOWS_QUOTES = False 419 NVL2_SUPPORTED = False 420 421 EXPRESSIONS_WITHOUT_NESTED_CTES = { 422 exp.Insert, 423 exp.Select, 424 exp.Subquery, 425 exp.Union, 426 } 427 428 TYPE_MAPPING = { 429 **generator.Generator.TYPE_MAPPING, 430 exp.DataType.Type.BIT: "BOOLEAN", 431 exp.DataType.Type.DATETIME: "TIMESTAMP", 432 exp.DataType.Type.TEXT: "STRING", 433 exp.DataType.Type.TIME: "TIMESTAMP", 434 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 435 exp.DataType.Type.VARBINARY: "BINARY", 436 } 437 438 TRANSFORMS = { 439 **generator.Generator.TRANSFORMS, 440 exp.Group: transforms.preprocess([transforms.unalias_group]), 441 exp.Select: transforms.preprocess( 442 [ 443 transforms.eliminate_qualify, 444 transforms.eliminate_distinct_on, 445 transforms.unnest_to_explode, 446 ] 447 ), 448 exp.Property: _property_sql, 449 exp.AnyValue: rename_func("FIRST"), 450 exp.ApproxDistinct: approx_count_distinct_sql, 451 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 452 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 453 exp.ArrayConcat: rename_func("CONCAT"), 454 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 455 exp.ArraySize: rename_func("SIZE"), 456 exp.ArraySort: _array_sort_sql, 457 exp.With: no_recursive_cte_sql, 458 exp.DateAdd: _add_date_sql, 459 exp.DateDiff: _date_diff_sql, 460 exp.DateStrToDate: rename_func("TO_DATE"), 461 exp.DateSub: _add_date_sql, 462 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 463 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 464 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 465 exp.FromBase64: rename_func("UNBASE64"), 466 exp.If: if_sql(), 467 exp.ILike: no_ilike_sql, 468 exp.IsNan: rename_func("ISNAN"), 469 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 470 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 471 exp.JSONFormat: _json_format_sql, 472 exp.Left: left_to_substring_sql, 473 exp.Map: var_map_sql, 474 exp.Max: max_or_greatest, 475 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 476 exp.Min: min_or_least, 477 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 478 exp.NotNullColumnConstraint: lambda self, e: "" 479 if e.args.get("allow_null") 480 else "NOT NULL", 481 exp.VarMap: var_map_sql, 482 exp.Create: _create_sql, 483 exp.Quantile: rename_func("PERCENTILE"), 484 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 485 exp.RegexpExtract: regexp_extract_sql, 486 exp.RegexpReplace: regexp_replace_sql, 487 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 488 exp.RegexpSplit: rename_func("SPLIT"), 489 exp.Right: right_to_substring_sql, 490 exp.SafeDivide: no_safe_divide_sql, 491 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 492 exp.SetAgg: rename_func("COLLECT_SET"), 493 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 494 exp.StrPosition: strposition_to_locate_sql, 495 exp.StrToDate: _str_to_date_sql, 496 exp.StrToTime: _str_to_time_sql, 497 exp.StrToUnix: _str_to_unix_sql, 498 exp.StructExtract: struct_extract_sql, 499 exp.TimeStrToDate: rename_func("TO_DATE"), 500 exp.TimeStrToTime: timestrtotime_sql, 501 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 502 exp.TimeToStr: _time_to_str, 503 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 504 exp.ToBase64: rename_func("BASE64"), 505 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 506 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 507 exp.TsOrDsToDate: _to_date_sql, 508 exp.TryCast: no_trycast_sql, 509 exp.UnixToStr: lambda self, e: self.func( 510 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 511 ), 512 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 513 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 514 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 515 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 516 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 517 exp.LastDateOfMonth: rename_func("LAST_DAY"), 518 exp.National: lambda self, e: self.national_sql(e, prefix=""), 519 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 520 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 521 exp.NotForReplicationColumnConstraint: lambda self, e: "", 522 exp.OnProperty: lambda self, e: "", 523 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 524 } 525 526 PROPERTIES_LOCATION = { 527 **generator.Generator.PROPERTIES_LOCATION, 528 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 529 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 530 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 531 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 532 } 533 534 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 535 # Hive has no temporary storage provider (there are hive settings though) 536 return expression 537 538 def parameter_sql(self, expression: exp.Parameter) -> str: 539 this = self.sql(expression, "this") 540 expression_sql = self.sql(expression, "expression") 541 542 parent = expression.parent 543 this = f"{this}:{expression_sql}" if expression_sql else this 544 545 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 546 # We need to produce SET key = value instead of SET ${key} = value 547 return this 548 549 return f"${{{this}}}" 550 551 def schema_sql(self, expression: exp.Schema) -> str: 552 for ordered in expression.find_all(exp.Ordered): 553 if ordered.args.get("desc") is False: 554 ordered.set("desc", None) 555 556 return super().schema_sql(expression) 557 558 def constraint_sql(self, expression: exp.Constraint) -> str: 559 for prop in list(expression.find_all(exp.Properties)): 560 prop.pop() 561 562 this = self.sql(expression, "this") 563 expressions = self.expressions(expression, sep=" ", flat=True) 564 return f"CONSTRAINT {this} {expressions}" 565 566 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 567 serde_props = self.sql(expression, "serde_properties") 568 serde_props = f" {serde_props}" if serde_props else "" 569 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 570 571 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 572 return self.func( 573 "COLLECT_LIST", 574 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 575 ) 576 577 def with_properties(self, properties: exp.Properties) -> str: 578 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 579 580 def datatype_sql(self, expression: exp.DataType) -> str: 581 if ( 582 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 583 and not expression.expressions 584 ): 585 expression = exp.DataType.build("text") 586 elif expression.this in exp.DataType.TEMPORAL_TYPES: 587 expression = exp.DataType.build(expression.this) 588 elif expression.is_type("float"): 589 size_expression = expression.find(exp.DataTypeParam) 590 if size_expression: 591 size = int(size_expression.name) 592 expression = ( 593 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 594 ) 595 596 return super().datatype_sql(expression) 597 598 def version_sql(self, expression: exp.Version) -> str: 599 sql = super().version_sql(expression) 600 return sql.replace("FOR ", "", 1)
DATE_DELTA_INTERVAL =
{'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR =
{'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH =
('YEAR', 'QUARTER', 'MONTH')
187class Hive(Dialect): 188 ALIAS_POST_TABLESAMPLE = True 189 IDENTIFIERS_CAN_START_WITH_DIGIT = True 190 SUPPORTS_USER_DEFINED_TYPES = False 191 SAFE_DIVISION = True 192 193 # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description 194 RESOLVES_IDENTIFIERS_AS_UPPERCASE = None 195 196 TIME_MAPPING = { 197 "y": "%Y", 198 "Y": "%Y", 199 "YYYY": "%Y", 200 "yyyy": "%Y", 201 "YY": "%y", 202 "yy": "%y", 203 "MMMM": "%B", 204 "MMM": "%b", 205 "MM": "%m", 206 "M": "%-m", 207 "dd": "%d", 208 "d": "%-d", 209 "HH": "%H", 210 "H": "%-H", 211 "hh": "%I", 212 "h": "%-I", 213 "mm": "%M", 214 "m": "%-M", 215 "ss": "%S", 216 "s": "%-S", 217 "SSSSSS": "%f", 218 "a": "%p", 219 "DD": "%j", 220 "D": "%-j", 221 "E": "%a", 222 "EE": "%a", 223 "EEE": "%a", 224 "EEEE": "%A", 225 } 226 227 DATE_FORMAT = "'yyyy-MM-dd'" 228 DATEINT_FORMAT = "'yyyyMMdd'" 229 TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'" 230 231 class Tokenizer(tokens.Tokenizer): 232 QUOTES = ["'", '"'] 233 IDENTIFIERS = ["`"] 234 STRING_ESCAPES = ["\\"] 235 ENCODE = "utf-8" 236 237 SINGLE_TOKENS = { 238 **tokens.Tokenizer.SINGLE_TOKENS, 239 "$": TokenType.PARAMETER, 240 } 241 242 KEYWORDS = { 243 **tokens.Tokenizer.KEYWORDS, 244 "ADD ARCHIVE": TokenType.COMMAND, 245 "ADD ARCHIVES": TokenType.COMMAND, 246 "ADD FILE": TokenType.COMMAND, 247 "ADD FILES": TokenType.COMMAND, 248 "ADD JAR": TokenType.COMMAND, 249 "ADD JARS": TokenType.COMMAND, 250 "MSCK REPAIR": TokenType.COMMAND, 251 "REFRESH": TokenType.REFRESH, 252 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 253 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 254 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 255 } 256 257 NUMERIC_LITERALS = { 258 "L": "BIGINT", 259 "S": "SMALLINT", 260 "Y": "TINYINT", 261 "D": "DOUBLE", 262 "F": "FLOAT", 263 "BD": "DECIMAL", 264 } 265 266 class Parser(parser.Parser): 267 LOG_DEFAULTS_TO_LN = True 268 STRICT_CAST = False 269 270 FUNCTIONS = { 271 **parser.Parser.FUNCTIONS, 272 "BASE64": exp.ToBase64.from_arg_list, 273 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 274 "COLLECT_SET": exp.SetAgg.from_arg_list, 275 "DATE_ADD": lambda args: exp.TsOrDsAdd( 276 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 277 ), 278 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 279 [ 280 exp.TimeStrToTime(this=seq_get(args, 0)), 281 seq_get(args, 1), 282 ] 283 ), 284 "DATE_SUB": lambda args: exp.TsOrDsAdd( 285 this=seq_get(args, 0), 286 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 287 unit=exp.Literal.string("DAY"), 288 ), 289 "DATEDIFF": lambda args: exp.DateDiff( 290 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 291 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 292 ), 293 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 294 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 295 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 296 "LOCATE": locate_to_strposition, 297 "MAP": parse_var_map, 298 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 299 "PERCENTILE": exp.Quantile.from_arg_list, 300 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 301 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 302 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 303 ), 304 "SIZE": exp.ArraySize.from_arg_list, 305 "SPLIT": exp.RegexpSplit.from_arg_list, 306 "STR_TO_MAP": lambda args: exp.StrToMap( 307 this=seq_get(args, 0), 308 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 309 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 310 ), 311 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 312 "TO_JSON": exp.JSONFormat.from_arg_list, 313 "UNBASE64": exp.FromBase64.from_arg_list, 314 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 315 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 316 } 317 318 NO_PAREN_FUNCTION_PARSERS = { 319 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 320 "TRANSFORM": lambda self: self._parse_transform(), 321 } 322 323 PROPERTY_PARSERS = { 324 **parser.Parser.PROPERTY_PARSERS, 325 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 326 expressions=self._parse_wrapped_csv(self._parse_property) 327 ), 328 } 329 330 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 331 if not self._match(TokenType.L_PAREN, advance=False): 332 self._retreat(self._index - 1) 333 return None 334 335 args = self._parse_wrapped_csv(self._parse_lambda) 336 row_format_before = self._parse_row_format(match_row=True) 337 338 record_writer = None 339 if self._match_text_seq("RECORDWRITER"): 340 record_writer = self._parse_string() 341 342 if not self._match(TokenType.USING): 343 return exp.Transform.from_arg_list(args) 344 345 command_script = self._parse_string() 346 347 self._match(TokenType.ALIAS) 348 schema = self._parse_schema() 349 350 row_format_after = self._parse_row_format(match_row=True) 351 record_reader = None 352 if self._match_text_seq("RECORDREADER"): 353 record_reader = self._parse_string() 354 355 return self.expression( 356 exp.QueryTransform, 357 expressions=args, 358 command_script=command_script, 359 schema=schema, 360 row_format_before=row_format_before, 361 record_writer=record_writer, 362 row_format_after=row_format_after, 363 record_reader=record_reader, 364 ) 365 366 def _parse_types( 367 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 368 ) -> t.Optional[exp.Expression]: 369 """ 370 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 371 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 372 373 spark-sql (default)> select cast(1234 as varchar(2)); 374 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 375 char/varchar type and simply treats them as string type. Please use string type 376 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 377 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 378 379 1234 380 Time taken: 4.265 seconds, Fetched 1 row(s) 381 382 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 383 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 384 385 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 386 """ 387 this = super()._parse_types( 388 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 389 ) 390 391 if this and not schema: 392 return this.transform( 393 lambda node: node.replace(exp.DataType.build("text")) 394 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 395 else node, 396 copy=False, 397 ) 398 399 return this 400 401 def _parse_partition_and_order( 402 self, 403 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 404 return ( 405 self._parse_csv(self._parse_conjunction) 406 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 407 else [], 408 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 409 ) 410 411 class Generator(generator.Generator): 412 LIMIT_FETCH = "LIMIT" 413 TABLESAMPLE_WITH_METHOD = False 414 TABLESAMPLE_SIZE_IS_PERCENT = True 415 JOIN_HINTS = False 416 TABLE_HINTS = False 417 QUERY_HINTS = False 418 INDEX_ON = "ON TABLE" 419 EXTRACT_ALLOWS_QUOTES = False 420 NVL2_SUPPORTED = False 421 422 EXPRESSIONS_WITHOUT_NESTED_CTES = { 423 exp.Insert, 424 exp.Select, 425 exp.Subquery, 426 exp.Union, 427 } 428 429 TYPE_MAPPING = { 430 **generator.Generator.TYPE_MAPPING, 431 exp.DataType.Type.BIT: "BOOLEAN", 432 exp.DataType.Type.DATETIME: "TIMESTAMP", 433 exp.DataType.Type.TEXT: "STRING", 434 exp.DataType.Type.TIME: "TIMESTAMP", 435 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 436 exp.DataType.Type.VARBINARY: "BINARY", 437 } 438 439 TRANSFORMS = { 440 **generator.Generator.TRANSFORMS, 441 exp.Group: transforms.preprocess([transforms.unalias_group]), 442 exp.Select: transforms.preprocess( 443 [ 444 transforms.eliminate_qualify, 445 transforms.eliminate_distinct_on, 446 transforms.unnest_to_explode, 447 ] 448 ), 449 exp.Property: _property_sql, 450 exp.AnyValue: rename_func("FIRST"), 451 exp.ApproxDistinct: approx_count_distinct_sql, 452 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 453 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 454 exp.ArrayConcat: rename_func("CONCAT"), 455 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 456 exp.ArraySize: rename_func("SIZE"), 457 exp.ArraySort: _array_sort_sql, 458 exp.With: no_recursive_cte_sql, 459 exp.DateAdd: _add_date_sql, 460 exp.DateDiff: _date_diff_sql, 461 exp.DateStrToDate: rename_func("TO_DATE"), 462 exp.DateSub: _add_date_sql, 463 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 464 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 465 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 466 exp.FromBase64: rename_func("UNBASE64"), 467 exp.If: if_sql(), 468 exp.ILike: no_ilike_sql, 469 exp.IsNan: rename_func("ISNAN"), 470 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 471 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 472 exp.JSONFormat: _json_format_sql, 473 exp.Left: left_to_substring_sql, 474 exp.Map: var_map_sql, 475 exp.Max: max_or_greatest, 476 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 477 exp.Min: min_or_least, 478 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 479 exp.NotNullColumnConstraint: lambda self, e: "" 480 if e.args.get("allow_null") 481 else "NOT NULL", 482 exp.VarMap: var_map_sql, 483 exp.Create: _create_sql, 484 exp.Quantile: rename_func("PERCENTILE"), 485 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 486 exp.RegexpExtract: regexp_extract_sql, 487 exp.RegexpReplace: regexp_replace_sql, 488 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 489 exp.RegexpSplit: rename_func("SPLIT"), 490 exp.Right: right_to_substring_sql, 491 exp.SafeDivide: no_safe_divide_sql, 492 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 493 exp.SetAgg: rename_func("COLLECT_SET"), 494 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 495 exp.StrPosition: strposition_to_locate_sql, 496 exp.StrToDate: _str_to_date_sql, 497 exp.StrToTime: _str_to_time_sql, 498 exp.StrToUnix: _str_to_unix_sql, 499 exp.StructExtract: struct_extract_sql, 500 exp.TimeStrToDate: rename_func("TO_DATE"), 501 exp.TimeStrToTime: timestrtotime_sql, 502 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 503 exp.TimeToStr: _time_to_str, 504 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 505 exp.ToBase64: rename_func("BASE64"), 506 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 507 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 508 exp.TsOrDsToDate: _to_date_sql, 509 exp.TryCast: no_trycast_sql, 510 exp.UnixToStr: lambda self, e: self.func( 511 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 512 ), 513 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 514 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 515 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 516 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 517 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 518 exp.LastDateOfMonth: rename_func("LAST_DAY"), 519 exp.National: lambda self, e: self.national_sql(e, prefix=""), 520 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 521 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 522 exp.NotForReplicationColumnConstraint: lambda self, e: "", 523 exp.OnProperty: lambda self, e: "", 524 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 525 } 526 527 PROPERTIES_LOCATION = { 528 **generator.Generator.PROPERTIES_LOCATION, 529 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 530 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 531 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 532 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 533 } 534 535 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 536 # Hive has no temporary storage provider (there are hive settings though) 537 return expression 538 539 def parameter_sql(self, expression: exp.Parameter) -> str: 540 this = self.sql(expression, "this") 541 expression_sql = self.sql(expression, "expression") 542 543 parent = expression.parent 544 this = f"{this}:{expression_sql}" if expression_sql else this 545 546 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 547 # We need to produce SET key = value instead of SET ${key} = value 548 return this 549 550 return f"${{{this}}}" 551 552 def schema_sql(self, expression: exp.Schema) -> str: 553 for ordered in expression.find_all(exp.Ordered): 554 if ordered.args.get("desc") is False: 555 ordered.set("desc", None) 556 557 return super().schema_sql(expression) 558 559 def constraint_sql(self, expression: exp.Constraint) -> str: 560 for prop in list(expression.find_all(exp.Properties)): 561 prop.pop() 562 563 this = self.sql(expression, "this") 564 expressions = self.expressions(expression, sep=" ", flat=True) 565 return f"CONSTRAINT {this} {expressions}" 566 567 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 568 serde_props = self.sql(expression, "serde_properties") 569 serde_props = f" {serde_props}" if serde_props else "" 570 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 571 572 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 573 return self.func( 574 "COLLECT_LIST", 575 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 576 ) 577 578 def with_properties(self, properties: exp.Properties) -> str: 579 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 580 581 def datatype_sql(self, expression: exp.DataType) -> str: 582 if ( 583 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 584 and not expression.expressions 585 ): 586 expression = exp.DataType.build("text") 587 elif expression.this in exp.DataType.TEMPORAL_TYPES: 588 expression = exp.DataType.build(expression.this) 589 elif expression.is_type("float"): 590 size_expression = expression.find(exp.DataTypeParam) 591 if size_expression: 592 size = int(size_expression.name) 593 expression = ( 594 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 595 ) 596 597 return super().datatype_sql(expression) 598 599 def version_sql(self, expression: exp.Version) -> str: 600 sql = super().version_sql(expression) 601 return sql.replace("FOR ", "", 1)
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
tokenizer_class =
<class 'Hive.Tokenizer'>
parser_class =
<class 'Hive.Parser'>
generator_class =
<class 'Hive.Generator'>
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
Inherited Members
- sqlglot.dialects.dialect.Dialect
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- NULL_ORDERING
- TYPED_DIVISION
- FORMAT_MAPPING
- ESCAPE_SEQUENCES
- PSEUDOCOLUMNS
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
231 class Tokenizer(tokens.Tokenizer): 232 QUOTES = ["'", '"'] 233 IDENTIFIERS = ["`"] 234 STRING_ESCAPES = ["\\"] 235 ENCODE = "utf-8" 236 237 SINGLE_TOKENS = { 238 **tokens.Tokenizer.SINGLE_TOKENS, 239 "$": TokenType.PARAMETER, 240 } 241 242 KEYWORDS = { 243 **tokens.Tokenizer.KEYWORDS, 244 "ADD ARCHIVE": TokenType.COMMAND, 245 "ADD ARCHIVES": TokenType.COMMAND, 246 "ADD FILE": TokenType.COMMAND, 247 "ADD FILES": TokenType.COMMAND, 248 "ADD JAR": TokenType.COMMAND, 249 "ADD JARS": TokenType.COMMAND, 250 "MSCK REPAIR": TokenType.COMMAND, 251 "REFRESH": TokenType.REFRESH, 252 "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT, 253 "VERSION AS OF": TokenType.VERSION_SNAPSHOT, 254 "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES, 255 } 256 257 NUMERIC_LITERALS = { 258 "L": "BIGINT", 259 "S": "SMALLINT", 260 "Y": "TINYINT", 261 "D": "DOUBLE", 262 "F": "FLOAT", 263 "BD": "DECIMAL", 264 }
SINGLE_TOKENS =
{'(': <TokenType.L_PAREN: 'L_PAREN'>, ')': <TokenType.R_PAREN: 'R_PAREN'>, '[': <TokenType.L_BRACKET: 'L_BRACKET'>, ']': <TokenType.R_BRACKET: 'R_BRACKET'>, '{': <TokenType.L_BRACE: 'L_BRACE'>, '}': <TokenType.R_BRACE: 'R_BRACE'>, '&': <TokenType.AMP: 'AMP'>, '^': <TokenType.CARET: 'CARET'>, ':': <TokenType.COLON: 'COLON'>, ',': <TokenType.COMMA: 'COMMA'>, '.': <TokenType.DOT: 'DOT'>, '-': <TokenType.DASH: 'DASH'>, '=': <TokenType.EQ: 'EQ'>, '>': <TokenType.GT: 'GT'>, '<': <TokenType.LT: 'LT'>, '%': <TokenType.MOD: 'MOD'>, '!': <TokenType.NOT: 'NOT'>, '|': <TokenType.PIPE: 'PIPE'>, '+': <TokenType.PLUS: 'PLUS'>, ';': <TokenType.SEMICOLON: 'SEMICOLON'>, '/': <TokenType.SLASH: 'SLASH'>, '\\': <TokenType.BACKSLASH: 'BACKSLASH'>, '*': <TokenType.STAR: 'STAR'>, '~': <TokenType.TILDA: 'TILDA'>, '?': <TokenType.PLACEHOLDER: 'PLACEHOLDER'>, '@': <TokenType.PARAMETER: 'PARAMETER'>, "'": <TokenType.QUOTE: 'QUOTE'>, '`': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '"': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '#': <TokenType.HASH: 'HASH'>, '$': <TokenType.PARAMETER: 'PARAMETER'>}
KEYWORDS =
{'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'KILL': <TokenType.KILL: 'KILL'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'MEDIUMINT': <TokenType.MEDIUMINT: 'MEDIUMINT'>, 'INT1': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'INT16': <TokenType.SMALLINT: 'SMALLINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'HUGEINT': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'INT32': <TokenType.INT: 'INT'>, 'INT64': <TokenType.BIGINT: 'BIGINT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.TINYINT: 'TINYINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'LONGTEXT': <TokenType.LONGTEXT: 'LONGTEXT'>, 'MEDIUMTEXT': <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, 'TINYTEXT': <TokenType.TINYTEXT: 'TINYTEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'LONGBLOB': <TokenType.LONGBLOB: 'LONGBLOB'>, 'MEDIUMBLOB': <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, 'TINYBLOB': <TokenType.TINYBLOB: 'TINYBLOB'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.REFRESH: 'REFRESH'>, 'TIMESTAMP AS OF': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'VERSION AS OF': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>}
266 class Parser(parser.Parser): 267 LOG_DEFAULTS_TO_LN = True 268 STRICT_CAST = False 269 270 FUNCTIONS = { 271 **parser.Parser.FUNCTIONS, 272 "BASE64": exp.ToBase64.from_arg_list, 273 "COLLECT_LIST": exp.ArrayAgg.from_arg_list, 274 "COLLECT_SET": exp.SetAgg.from_arg_list, 275 "DATE_ADD": lambda args: exp.TsOrDsAdd( 276 this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY") 277 ), 278 "DATE_FORMAT": lambda args: format_time_lambda(exp.TimeToStr, "hive")( 279 [ 280 exp.TimeStrToTime(this=seq_get(args, 0)), 281 seq_get(args, 1), 282 ] 283 ), 284 "DATE_SUB": lambda args: exp.TsOrDsAdd( 285 this=seq_get(args, 0), 286 expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)), 287 unit=exp.Literal.string("DAY"), 288 ), 289 "DATEDIFF": lambda args: exp.DateDiff( 290 this=exp.TsOrDsToDate(this=seq_get(args, 0)), 291 expression=exp.TsOrDsToDate(this=seq_get(args, 1)), 292 ), 293 "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))), 294 "FROM_UNIXTIME": format_time_lambda(exp.UnixToStr, "hive", True), 295 "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list, 296 "LOCATE": locate_to_strposition, 297 "MAP": parse_var_map, 298 "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)), 299 "PERCENTILE": exp.Quantile.from_arg_list, 300 "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list, 301 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 302 this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2) 303 ), 304 "SIZE": exp.ArraySize.from_arg_list, 305 "SPLIT": exp.RegexpSplit.from_arg_list, 306 "STR_TO_MAP": lambda args: exp.StrToMap( 307 this=seq_get(args, 0), 308 pair_delim=seq_get(args, 1) or exp.Literal.string(","), 309 key_value_delim=seq_get(args, 2) or exp.Literal.string(":"), 310 ), 311 "TO_DATE": format_time_lambda(exp.TsOrDsToDate, "hive"), 312 "TO_JSON": exp.JSONFormat.from_arg_list, 313 "UNBASE64": exp.FromBase64.from_arg_list, 314 "UNIX_TIMESTAMP": format_time_lambda(exp.StrToUnix, "hive", True), 315 "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)), 316 } 317 318 NO_PAREN_FUNCTION_PARSERS = { 319 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 320 "TRANSFORM": lambda self: self._parse_transform(), 321 } 322 323 PROPERTY_PARSERS = { 324 **parser.Parser.PROPERTY_PARSERS, 325 "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties( 326 expressions=self._parse_wrapped_csv(self._parse_property) 327 ), 328 } 329 330 def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]: 331 if not self._match(TokenType.L_PAREN, advance=False): 332 self._retreat(self._index - 1) 333 return None 334 335 args = self._parse_wrapped_csv(self._parse_lambda) 336 row_format_before = self._parse_row_format(match_row=True) 337 338 record_writer = None 339 if self._match_text_seq("RECORDWRITER"): 340 record_writer = self._parse_string() 341 342 if not self._match(TokenType.USING): 343 return exp.Transform.from_arg_list(args) 344 345 command_script = self._parse_string() 346 347 self._match(TokenType.ALIAS) 348 schema = self._parse_schema() 349 350 row_format_after = self._parse_row_format(match_row=True) 351 record_reader = None 352 if self._match_text_seq("RECORDREADER"): 353 record_reader = self._parse_string() 354 355 return self.expression( 356 exp.QueryTransform, 357 expressions=args, 358 command_script=command_script, 359 schema=schema, 360 row_format_before=row_format_before, 361 record_writer=record_writer, 362 row_format_after=row_format_after, 363 record_reader=record_reader, 364 ) 365 366 def _parse_types( 367 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 368 ) -> t.Optional[exp.Expression]: 369 """ 370 Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to 371 STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0: 372 373 spark-sql (default)> select cast(1234 as varchar(2)); 374 23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support 375 char/varchar type and simply treats them as string type. Please use string type 376 directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString 377 to true, so that Spark treat them as string type as same as Spark 3.0 and earlier 378 379 1234 380 Time taken: 4.265 seconds, Fetched 1 row(s) 381 382 This shows that Spark doesn't truncate the value into '12', which is inconsistent with 383 what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly. 384 385 Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html 386 """ 387 this = super()._parse_types( 388 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 389 ) 390 391 if this and not schema: 392 return this.transform( 393 lambda node: node.replace(exp.DataType.build("text")) 394 if isinstance(node, exp.DataType) and node.is_type("char", "varchar") 395 else node, 396 copy=False, 397 ) 398 399 return this 400 401 def _parse_partition_and_order( 402 self, 403 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 404 return ( 405 self._parse_csv(self._parse_conjunction) 406 if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY}) 407 else [], 408 super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)), 409 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
FUNCTIONS =
{'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'APPROX_TOP_K': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxTopK'>>, 'ARG_MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARGMAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'MAX_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARG_MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARGMIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'MIN_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CHR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'COLLATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Collate'>>, 'CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Concat'>>, 'CONCAT_WS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ConcatWs'>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ExplodeOuter'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FIRST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.First'>>, 'FLATTEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Flatten'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_INF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsInf'>>, 'ISINF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsInf'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'J_S_O_N_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArray'>>, 'J_S_O_N_ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayAgg'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtract'>>, 'JSON_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'J_S_O_N_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONTable'>>, 'LAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Last'>>, 'LAST_DATE_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDateOfMonth'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log'>>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function parse_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NULLIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nullif'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PARSE_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'JSON_PARSE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POSEXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PosexplodeOuter'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'PREDICT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Predict'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeConcat'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SET_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_MAP': <function Hive.Parser.<lambda>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'STUFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'INSERT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Timestamp'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TO_DAYS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToDays'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function parse_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'LIKE': <function parse_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SetAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function format_time_lambda.<locals>._format_time>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function format_time_lambda.<locals>._format_time>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function format_time_lambda.<locals>._format_time>}
NO_PAREN_FUNCTION_PARSERS =
{'ANY': <function Parser.<lambda>>, 'CASE': <function Parser.<lambda>>, 'IF': <function Parser.<lambda>>, 'NEXT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}
PROPERTY_PARSERS =
{'ALGORITHM': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARSET': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'INPUT': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'OUTPUT': <function Parser.<lambda>>, 'PARTITION': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'REMOTE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SAMPLE': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'SYSTEM_VERSIONING': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TRANSFORM': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SET_TRIE: Dict =
{'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
FORMAT_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
TIME_MAPPING: Dict[str, str] =
{'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}
TIME_TRIE: Dict =
{'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_KEYWORDS
- DB_CREATABLES
- CREATABLES
- ID_VAR_TOKENS
- INTERVAL_VARS
- TABLE_ALIAS_TOKENS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- FUNCTION_PARSERS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- MODIFIABLES
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- CLONE_KINDS
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- CONCAT_NULL_OUTPUTS_STRING
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_BASE_FIRST
- ALTER_TABLE_ADD_COLUMN_KEYWORD
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- TYPED_DIVISION
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- FORMAT_MAPPING
- error_level
- error_message_context
- max_errors
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
411 class Generator(generator.Generator): 412 LIMIT_FETCH = "LIMIT" 413 TABLESAMPLE_WITH_METHOD = False 414 TABLESAMPLE_SIZE_IS_PERCENT = True 415 JOIN_HINTS = False 416 TABLE_HINTS = False 417 QUERY_HINTS = False 418 INDEX_ON = "ON TABLE" 419 EXTRACT_ALLOWS_QUOTES = False 420 NVL2_SUPPORTED = False 421 422 EXPRESSIONS_WITHOUT_NESTED_CTES = { 423 exp.Insert, 424 exp.Select, 425 exp.Subquery, 426 exp.Union, 427 } 428 429 TYPE_MAPPING = { 430 **generator.Generator.TYPE_MAPPING, 431 exp.DataType.Type.BIT: "BOOLEAN", 432 exp.DataType.Type.DATETIME: "TIMESTAMP", 433 exp.DataType.Type.TEXT: "STRING", 434 exp.DataType.Type.TIME: "TIMESTAMP", 435 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 436 exp.DataType.Type.VARBINARY: "BINARY", 437 } 438 439 TRANSFORMS = { 440 **generator.Generator.TRANSFORMS, 441 exp.Group: transforms.preprocess([transforms.unalias_group]), 442 exp.Select: transforms.preprocess( 443 [ 444 transforms.eliminate_qualify, 445 transforms.eliminate_distinct_on, 446 transforms.unnest_to_explode, 447 ] 448 ), 449 exp.Property: _property_sql, 450 exp.AnyValue: rename_func("FIRST"), 451 exp.ApproxDistinct: approx_count_distinct_sql, 452 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 453 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 454 exp.ArrayConcat: rename_func("CONCAT"), 455 exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this), 456 exp.ArraySize: rename_func("SIZE"), 457 exp.ArraySort: _array_sort_sql, 458 exp.With: no_recursive_cte_sql, 459 exp.DateAdd: _add_date_sql, 460 exp.DateDiff: _date_diff_sql, 461 exp.DateStrToDate: rename_func("TO_DATE"), 462 exp.DateSub: _add_date_sql, 463 exp.DateToDi: lambda self, e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)", 464 exp.DiToDate: lambda self, e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})", 465 exp.FileFormatProperty: lambda self, e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}", 466 exp.FromBase64: rename_func("UNBASE64"), 467 exp.If: if_sql(), 468 exp.ILike: no_ilike_sql, 469 exp.IsNan: rename_func("ISNAN"), 470 exp.JSONExtract: rename_func("GET_JSON_OBJECT"), 471 exp.JSONExtractScalar: rename_func("GET_JSON_OBJECT"), 472 exp.JSONFormat: _json_format_sql, 473 exp.Left: left_to_substring_sql, 474 exp.Map: var_map_sql, 475 exp.Max: max_or_greatest, 476 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 477 exp.Min: min_or_least, 478 exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression), 479 exp.NotNullColumnConstraint: lambda self, e: "" 480 if e.args.get("allow_null") 481 else "NOT NULL", 482 exp.VarMap: var_map_sql, 483 exp.Create: _create_sql, 484 exp.Quantile: rename_func("PERCENTILE"), 485 exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"), 486 exp.RegexpExtract: regexp_extract_sql, 487 exp.RegexpReplace: regexp_replace_sql, 488 exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"), 489 exp.RegexpSplit: rename_func("SPLIT"), 490 exp.Right: right_to_substring_sql, 491 exp.SafeDivide: no_safe_divide_sql, 492 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 493 exp.SetAgg: rename_func("COLLECT_SET"), 494 exp.Split: lambda self, e: f"SPLIT({self.sql(e, 'this')}, CONCAT('\\\\Q', {self.sql(e, 'expression')}))", 495 exp.StrPosition: strposition_to_locate_sql, 496 exp.StrToDate: _str_to_date_sql, 497 exp.StrToTime: _str_to_time_sql, 498 exp.StrToUnix: _str_to_unix_sql, 499 exp.StructExtract: struct_extract_sql, 500 exp.TimeStrToDate: rename_func("TO_DATE"), 501 exp.TimeStrToTime: timestrtotime_sql, 502 exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"), 503 exp.TimeToStr: _time_to_str, 504 exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"), 505 exp.ToBase64: rename_func("BASE64"), 506 exp.TsOrDiToDi: lambda self, e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)", 507 exp.TsOrDsAdd: lambda self, e: f"DATE_ADD({self.sql(e, 'this')}, {self.sql(e, 'expression')})", 508 exp.TsOrDsToDate: _to_date_sql, 509 exp.TryCast: no_trycast_sql, 510 exp.UnixToStr: lambda self, e: self.func( 511 "FROM_UNIXTIME", e.this, time_format("hive")(self, e) 512 ), 513 exp.UnixToTime: rename_func("FROM_UNIXTIME"), 514 exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"), 515 exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}", 516 exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"), 517 exp.NumberToStr: rename_func("FORMAT_NUMBER"), 518 exp.LastDateOfMonth: rename_func("LAST_DAY"), 519 exp.National: lambda self, e: self.national_sql(e, prefix=""), 520 exp.ClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 521 exp.NonClusteredColumnConstraint: lambda self, e: f"({self.expressions(e, 'this', indent=False)})", 522 exp.NotForReplicationColumnConstraint: lambda self, e: "", 523 exp.OnProperty: lambda self, e: "", 524 exp.PrimaryKeyColumnConstraint: lambda self, e: "PRIMARY KEY", 525 } 526 527 PROPERTIES_LOCATION = { 528 **generator.Generator.PROPERTIES_LOCATION, 529 exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA, 530 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 531 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 532 exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED, 533 } 534 535 def temporary_storage_provider(self, expression: exp.Create) -> exp.Create: 536 # Hive has no temporary storage provider (there are hive settings though) 537 return expression 538 539 def parameter_sql(self, expression: exp.Parameter) -> str: 540 this = self.sql(expression, "this") 541 expression_sql = self.sql(expression, "expression") 542 543 parent = expression.parent 544 this = f"{this}:{expression_sql}" if expression_sql else this 545 546 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 547 # We need to produce SET key = value instead of SET ${key} = value 548 return this 549 550 return f"${{{this}}}" 551 552 def schema_sql(self, expression: exp.Schema) -> str: 553 for ordered in expression.find_all(exp.Ordered): 554 if ordered.args.get("desc") is False: 555 ordered.set("desc", None) 556 557 return super().schema_sql(expression) 558 559 def constraint_sql(self, expression: exp.Constraint) -> str: 560 for prop in list(expression.find_all(exp.Properties)): 561 prop.pop() 562 563 this = self.sql(expression, "this") 564 expressions = self.expressions(expression, sep=" ", flat=True) 565 return f"CONSTRAINT {this} {expressions}" 566 567 def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str: 568 serde_props = self.sql(expression, "serde_properties") 569 serde_props = f" {serde_props}" if serde_props else "" 570 return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}" 571 572 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 573 return self.func( 574 "COLLECT_LIST", 575 expression.this.this if isinstance(expression.this, exp.Order) else expression.this, 576 ) 577 578 def with_properties(self, properties: exp.Properties) -> str: 579 return self.properties(properties, prefix=self.seg("TBLPROPERTIES")) 580 581 def datatype_sql(self, expression: exp.DataType) -> str: 582 if ( 583 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 584 and not expression.expressions 585 ): 586 expression = exp.DataType.build("text") 587 elif expression.this in exp.DataType.TEMPORAL_TYPES: 588 expression = exp.DataType.build(expression.this) 589 elif expression.is_type("float"): 590 size_expression = expression.find(exp.DataTypeParam) 591 if size_expression: 592 size = int(size_expression.name) 593 expression = ( 594 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 595 ) 596 597 return super().datatype_sql(expression) 598 599 def version_sql(self, expression: exp.Version) -> str: 600 sql = super().version_sql(expression) 601 return sql.replace("FOR ", "", 1)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
EXPRESSIONS_WITHOUT_NESTED_CTES =
{<class 'sqlglot.expressions.Subquery'>, <class 'sqlglot.expressions.Select'>, <class 'sqlglot.expressions.Union'>, <class 'sqlglot.expressions.Insert'>}
TYPE_MAPPING =
{<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.TINYTEXT: 'TINYTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.TINYBLOB: 'TINYBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.BIT: 'BIT'>: 'BOOLEAN', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.TEXT: 'TEXT'>: 'STRING', <Type.TIME: 'TIME'>: 'TIMESTAMP', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY'}
TRANSFORMS =
{<class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CheckColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.IntervalSpan'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NonClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotForReplicationColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OutputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SampleProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransformModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.AnyValue'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArgMax'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArgMin'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql.<locals>._if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.MonthsBetween'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotNullColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Create'>: <function _create_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpReplace'>: <function regexp_replace_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SetAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function _time_to_str>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.LastDateOfMonth'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.PrimaryKeyColumnConstraint'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION =
{<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.HeapProperty'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.InputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OutputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedOfProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SampleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.TransformModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.WithSystemVersioningProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>}
def
temporary_storage_provider( self, expression: sqlglot.expressions.Create) -> sqlglot.expressions.Create:
539 def parameter_sql(self, expression: exp.Parameter) -> str: 540 this = self.sql(expression, "this") 541 expression_sql = self.sql(expression, "expression") 542 543 parent = expression.parent 544 this = f"{this}:{expression_sql}" if expression_sql else this 545 546 if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem): 547 # We need to produce SET key = value instead of SET ${key} = value 548 return this 549 550 return f"${{{this}}}"
def
rowformatserdeproperty_sql(self, expression: sqlglot.expressions.RowFormatSerdeProperty) -> str:
581 def datatype_sql(self, expression: exp.DataType) -> str: 582 if ( 583 expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR) 584 and not expression.expressions 585 ): 586 expression = exp.DataType.build("text") 587 elif expression.this in exp.DataType.TEMPORAL_TYPES: 588 expression = exp.DataType.build(expression.this) 589 elif expression.is_type("float"): 590 size_expression = expression.find(exp.DataTypeParam) 591 if size_expression: 592 size = int(size_expression.name) 593 expression = ( 594 exp.DataType.build("float") if size <= 32 else exp.DataType.build("double") 595 ) 596 597 return super().datatype_sql(expression)
INVERSE_TIME_MAPPING: Dict[str, str] =
{'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict =
{'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
@classmethod
def
can_identify(text: str, identify: str | bool = 'safe') -> bool:
288 @classmethod 289 def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: 290 """Checks if text can be identified given an identify option. 291 292 Args: 293 text: The text to check. 294 identify: 295 "always" or `True`: Always returns true. 296 "safe": True if the identifier is case-insensitive. 297 298 Returns: 299 Whether or not the given text can be identified. 300 """ 301 if identify is True or identify == "always": 302 return True 303 304 if identify == "safe": 305 return not cls.case_sensitive(text) 306 307 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify: "always" or
True
: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
TOKENIZER_CLASS =
<class 'Hive.Tokenizer'>
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOG_BASE_FIRST
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_ADD_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- SUPPORTS_PARAMETERS
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- TYPED_DIVISION
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- KEY_VALUE_DEFINITONS
- SENTINEL_LINE_BREAK
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- normalize_functions
- unsupported_messages
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_columns_sql
- star_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- safebracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- formatjson_sql
- jsonobject_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql