Edit on GitHub

sqlglot.dialects.hive

  1from __future__ import annotations
  2
  3import typing as t
  4
  5from sqlglot import exp, generator, parser, tokens, transforms
  6from sqlglot.dialects.dialect import (
  7    DATE_ADD_OR_SUB,
  8    Dialect,
  9    NormalizationStrategy,
 10    approx_count_distinct_sql,
 11    arg_max_or_min_no_count,
 12    datestrtodate_sql,
 13    build_formatted_time,
 14    if_sql,
 15    is_parse_json,
 16    left_to_substring_sql,
 17    locate_to_strposition,
 18    max_or_greatest,
 19    min_or_least,
 20    no_ilike_sql,
 21    no_recursive_cte_sql,
 22    no_safe_divide_sql,
 23    no_trycast_sql,
 24    regexp_extract_sql,
 25    regexp_replace_sql,
 26    rename_func,
 27    right_to_substring_sql,
 28    strposition_to_locate_sql,
 29    struct_extract_sql,
 30    time_format,
 31    timestrtotime_sql,
 32    var_map_sql,
 33)
 34from sqlglot.transforms import (
 35    remove_unique_constraints,
 36    ctas_with_tmp_tables_to_create_tmp_view,
 37    preprocess,
 38    move_schema_columns_to_partitioned_by,
 39)
 40from sqlglot.helper import seq_get
 41from sqlglot.tokens import TokenType
 42
 43# (FuncType, Multiplier)
 44DATE_DELTA_INTERVAL = {
 45    "YEAR": ("ADD_MONTHS", 12),
 46    "MONTH": ("ADD_MONTHS", 1),
 47    "QUARTER": ("ADD_MONTHS", 3),
 48    "WEEK": ("DATE_ADD", 7),
 49    "DAY": ("DATE_ADD", 1),
 50}
 51
 52TIME_DIFF_FACTOR = {
 53    "MILLISECOND": " * 1000",
 54    "SECOND": "",
 55    "MINUTE": " / 60",
 56    "HOUR": " / 3600",
 57}
 58
 59DIFF_MONTH_SWITCH = ("YEAR", "QUARTER", "MONTH")
 60
 61
 62def _add_date_sql(self: Hive.Generator, expression: DATE_ADD_OR_SUB) -> str:
 63    if isinstance(expression, exp.TsOrDsAdd) and not expression.unit:
 64        return self.func("DATE_ADD", expression.this, expression.expression)
 65
 66    unit = expression.text("unit").upper()
 67    func, multiplier = DATE_DELTA_INTERVAL.get(unit, ("DATE_ADD", 1))
 68
 69    if isinstance(expression, exp.DateSub):
 70        multiplier *= -1
 71
 72    if expression.expression.is_number:
 73        modified_increment = exp.Literal.number(int(expression.text("expression")) * multiplier)
 74    else:
 75        modified_increment = expression.expression
 76        if multiplier != 1:
 77            modified_increment = exp.Mul(  # type: ignore
 78                this=modified_increment, expression=exp.Literal.number(multiplier)
 79            )
 80
 81    return self.func(func, expression.this, modified_increment)
 82
 83
 84def _date_diff_sql(self: Hive.Generator, expression: exp.DateDiff | exp.TsOrDsDiff) -> str:
 85    unit = expression.text("unit").upper()
 86
 87    factor = TIME_DIFF_FACTOR.get(unit)
 88    if factor is not None:
 89        left = self.sql(expression, "this")
 90        right = self.sql(expression, "expression")
 91        sec_diff = f"UNIX_TIMESTAMP({left}) - UNIX_TIMESTAMP({right})"
 92        return f"({sec_diff}){factor}" if factor else sec_diff
 93
 94    months_between = unit in DIFF_MONTH_SWITCH
 95    sql_func = "MONTHS_BETWEEN" if months_between else "DATEDIFF"
 96    _, multiplier = DATE_DELTA_INTERVAL.get(unit, ("", 1))
 97    multiplier_sql = f" / {multiplier}" if multiplier > 1 else ""
 98    diff_sql = f"{sql_func}({self.format_args(expression.this, expression.expression)})"
 99
100    if months_between or multiplier_sql:
101        # MONTHS_BETWEEN returns a float, so we need to truncate the fractional part.
102        # For the same reason, we want to truncate if there's a divisor present.
103        diff_sql = f"CAST({diff_sql}{multiplier_sql} AS INT)"
104
105    return diff_sql
106
107
108def _json_format_sql(self: Hive.Generator, expression: exp.JSONFormat) -> str:
109    this = expression.this
110
111    if is_parse_json(this):
112        if this.this.is_string:
113            # Since FROM_JSON requires a nested type, we always wrap the json string with
114            # an array to ensure that "naked" strings like "'a'" will be handled correctly
115            wrapped_json = exp.Literal.string(f"[{this.this.name}]")
116
117            from_json = self.func(
118                "FROM_JSON", wrapped_json, self.func("SCHEMA_OF_JSON", wrapped_json)
119            )
120            to_json = self.func("TO_JSON", from_json)
121
122            # This strips the [, ] delimiters of the dummy array printed by TO_JSON
123            return self.func("REGEXP_EXTRACT", to_json, "'^.(.*).$'", "1")
124        return self.sql(this)
125
126    return self.func("TO_JSON", this, expression.args.get("options"))
127
128
129def _array_sort_sql(self: Hive.Generator, expression: exp.ArraySort) -> str:
130    if expression.expression:
131        self.unsupported("Hive SORT_ARRAY does not support a comparator")
132    return self.func("SORT_ARRAY", expression.this)
133
134
135def _property_sql(self: Hive.Generator, expression: exp.Property) -> str:
136    return f"{self.property_name(expression, string_key=True)}={self.sql(expression, 'value')}"
137
138
139def _str_to_unix_sql(self: Hive.Generator, expression: exp.StrToUnix) -> str:
140    return self.func("UNIX_TIMESTAMP", expression.this, time_format("hive")(self, expression))
141
142
143def _str_to_date_sql(self: Hive.Generator, expression: exp.StrToDate) -> str:
144    this = self.sql(expression, "this")
145    time_format = self.format_time(expression)
146    if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT):
147        this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))"
148    return f"CAST({this} AS DATE)"
149
150
151def _str_to_time_sql(self: Hive.Generator, expression: exp.StrToTime) -> str:
152    this = self.sql(expression, "this")
153    time_format = self.format_time(expression)
154    if time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT):
155        this = f"FROM_UNIXTIME(UNIX_TIMESTAMP({this}, {time_format}))"
156    return f"CAST({this} AS TIMESTAMP)"
157
158
159def _to_date_sql(self: Hive.Generator, expression: exp.TsOrDsToDate) -> str:
160    time_format = self.format_time(expression)
161    if time_format and time_format not in (Hive.TIME_FORMAT, Hive.DATE_FORMAT):
162        return self.func("TO_DATE", expression.this, time_format)
163
164    if isinstance(expression.this, exp.TsOrDsToDate):
165        return self.sql(expression, "this")
166
167    return self.func("TO_DATE", expression.this)
168
169
170def _build_with_ignore_nulls(
171    exp_class: t.Type[exp.Expression],
172) -> t.Callable[[t.List[exp.Expression]], exp.Expression]:
173    def _parse(args: t.List[exp.Expression]) -> exp.Expression:
174        this = exp_class(this=seq_get(args, 0))
175        if seq_get(args, 1) == exp.true():
176            return exp.IgnoreNulls(this=this)
177        return this
178
179    return _parse
180
181
182class Hive(Dialect):
183    ALIAS_POST_TABLESAMPLE = True
184    IDENTIFIERS_CAN_START_WITH_DIGIT = True
185    SUPPORTS_USER_DEFINED_TYPES = False
186    SAFE_DIVISION = True
187
188    # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description
189    NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
190
191    TIME_MAPPING = {
192        "y": "%Y",
193        "Y": "%Y",
194        "YYYY": "%Y",
195        "yyyy": "%Y",
196        "YY": "%y",
197        "yy": "%y",
198        "MMMM": "%B",
199        "MMM": "%b",
200        "MM": "%m",
201        "M": "%-m",
202        "dd": "%d",
203        "d": "%-d",
204        "HH": "%H",
205        "H": "%-H",
206        "hh": "%I",
207        "h": "%-I",
208        "mm": "%M",
209        "m": "%-M",
210        "ss": "%S",
211        "s": "%-S",
212        "SSSSSS": "%f",
213        "a": "%p",
214        "DD": "%j",
215        "D": "%-j",
216        "E": "%a",
217        "EE": "%a",
218        "EEE": "%a",
219        "EEEE": "%A",
220    }
221
222    DATE_FORMAT = "'yyyy-MM-dd'"
223    DATEINT_FORMAT = "'yyyyMMdd'"
224    TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'"
225
226    class Tokenizer(tokens.Tokenizer):
227        QUOTES = ["'", '"']
228        IDENTIFIERS = ["`"]
229        STRING_ESCAPES = ["\\"]
230
231        SINGLE_TOKENS = {
232            **tokens.Tokenizer.SINGLE_TOKENS,
233            "$": TokenType.PARAMETER,
234        }
235
236        KEYWORDS = {
237            **tokens.Tokenizer.KEYWORDS,
238            "ADD ARCHIVE": TokenType.COMMAND,
239            "ADD ARCHIVES": TokenType.COMMAND,
240            "ADD FILE": TokenType.COMMAND,
241            "ADD FILES": TokenType.COMMAND,
242            "ADD JAR": TokenType.COMMAND,
243            "ADD JARS": TokenType.COMMAND,
244            "MSCK REPAIR": TokenType.COMMAND,
245            "REFRESH": TokenType.REFRESH,
246            "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT,
247            "VERSION AS OF": TokenType.VERSION_SNAPSHOT,
248            "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES,
249        }
250
251        NUMERIC_LITERALS = {
252            "L": "BIGINT",
253            "S": "SMALLINT",
254            "Y": "TINYINT",
255            "D": "DOUBLE",
256            "F": "FLOAT",
257            "BD": "DECIMAL",
258        }
259
260    class Parser(parser.Parser):
261        LOG_DEFAULTS_TO_LN = True
262        STRICT_CAST = False
263        VALUES_FOLLOWED_BY_PAREN = False
264
265        FUNCTIONS = {
266            **parser.Parser.FUNCTIONS,
267            "BASE64": exp.ToBase64.from_arg_list,
268            "COLLECT_LIST": exp.ArrayAgg.from_arg_list,
269            "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list,
270            "DATE_ADD": lambda args: exp.TsOrDsAdd(
271                this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY")
272            ),
273            "DATE_FORMAT": lambda args: build_formatted_time(exp.TimeToStr, "hive")(
274                [
275                    exp.TimeStrToTime(this=seq_get(args, 0)),
276                    seq_get(args, 1),
277                ]
278            ),
279            "DATE_SUB": lambda args: exp.TsOrDsAdd(
280                this=seq_get(args, 0),
281                expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)),
282                unit=exp.Literal.string("DAY"),
283            ),
284            "DATEDIFF": lambda args: exp.DateDiff(
285                this=exp.TsOrDsToDate(this=seq_get(args, 0)),
286                expression=exp.TsOrDsToDate(this=seq_get(args, 1)),
287            ),
288            "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))),
289            "FIRST": _build_with_ignore_nulls(exp.First),
290            "FIRST_VALUE": _build_with_ignore_nulls(exp.FirstValue),
291            "FROM_UNIXTIME": build_formatted_time(exp.UnixToStr, "hive", True),
292            "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list,
293            "LAST": _build_with_ignore_nulls(exp.Last),
294            "LAST_VALUE": _build_with_ignore_nulls(exp.LastValue),
295            "LOCATE": locate_to_strposition,
296            "MAP": parser.build_var_map,
297            "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)),
298            "PERCENTILE": exp.Quantile.from_arg_list,
299            "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list,
300            "REGEXP_EXTRACT": lambda args: exp.RegexpExtract(
301                this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2)
302            ),
303            "SIZE": exp.ArraySize.from_arg_list,
304            "SPLIT": exp.RegexpSplit.from_arg_list,
305            "STR_TO_MAP": lambda args: exp.StrToMap(
306                this=seq_get(args, 0),
307                pair_delim=seq_get(args, 1) or exp.Literal.string(","),
308                key_value_delim=seq_get(args, 2) or exp.Literal.string(":"),
309            ),
310            "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "hive"),
311            "TO_JSON": exp.JSONFormat.from_arg_list,
312            "UNBASE64": exp.FromBase64.from_arg_list,
313            "UNIX_TIMESTAMP": build_formatted_time(exp.StrToUnix, "hive", True),
314            "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)),
315        }
316
317        NO_PAREN_FUNCTION_PARSERS = {
318            **parser.Parser.NO_PAREN_FUNCTION_PARSERS,
319            "TRANSFORM": lambda self: self._parse_transform(),
320        }
321
322        PROPERTY_PARSERS = {
323            **parser.Parser.PROPERTY_PARSERS,
324            "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties(
325                expressions=self._parse_wrapped_csv(self._parse_property)
326            ),
327        }
328
329        def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]:
330            if not self._match(TokenType.L_PAREN, advance=False):
331                self._retreat(self._index - 1)
332                return None
333
334            args = self._parse_wrapped_csv(self._parse_lambda)
335            row_format_before = self._parse_row_format(match_row=True)
336
337            record_writer = None
338            if self._match_text_seq("RECORDWRITER"):
339                record_writer = self._parse_string()
340
341            if not self._match(TokenType.USING):
342                return exp.Transform.from_arg_list(args)
343
344            command_script = self._parse_string()
345
346            self._match(TokenType.ALIAS)
347            schema = self._parse_schema()
348
349            row_format_after = self._parse_row_format(match_row=True)
350            record_reader = None
351            if self._match_text_seq("RECORDREADER"):
352                record_reader = self._parse_string()
353
354            return self.expression(
355                exp.QueryTransform,
356                expressions=args,
357                command_script=command_script,
358                schema=schema,
359                row_format_before=row_format_before,
360                record_writer=record_writer,
361                row_format_after=row_format_after,
362                record_reader=record_reader,
363            )
364
365        def _parse_types(
366            self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
367        ) -> t.Optional[exp.Expression]:
368            """
369            Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to
370            STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0:
371
372                spark-sql (default)> select cast(1234 as varchar(2));
373                23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support
374                char/varchar type and simply treats them as string type. Please use string type
375                directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString
376                to true, so that Spark treat them as string type as same as Spark 3.0 and earlier
377
378                1234
379                Time taken: 4.265 seconds, Fetched 1 row(s)
380
381            This shows that Spark doesn't truncate the value into '12', which is inconsistent with
382            what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly.
383
384            Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html
385            """
386            this = super()._parse_types(
387                check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
388            )
389
390            if this and not schema:
391                return this.transform(
392                    lambda node: (
393                        node.replace(exp.DataType.build("text"))
394                        if isinstance(node, exp.DataType) and node.is_type("char", "varchar")
395                        else node
396                    ),
397                    copy=False,
398                )
399
400            return this
401
402        def _parse_partition_and_order(
403            self,
404        ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]:
405            return (
406                (
407                    self._parse_csv(self._parse_conjunction)
408                    if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY})
409                    else []
410                ),
411                super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)),
412            )
413
414    class Generator(generator.Generator):
415        LIMIT_FETCH = "LIMIT"
416        TABLESAMPLE_WITH_METHOD = False
417        JOIN_HINTS = False
418        TABLE_HINTS = False
419        QUERY_HINTS = False
420        INDEX_ON = "ON TABLE"
421        EXTRACT_ALLOWS_QUOTES = False
422        NVL2_SUPPORTED = False
423        LAST_DAY_SUPPORTS_DATE_PART = False
424        JSON_PATH_SINGLE_QUOTE_ESCAPE = True
425
426        EXPRESSIONS_WITHOUT_NESTED_CTES = {
427            exp.Insert,
428            exp.Select,
429            exp.Subquery,
430            exp.Union,
431        }
432
433        SUPPORTED_JSON_PATH_PARTS = {
434            exp.JSONPathKey,
435            exp.JSONPathRoot,
436            exp.JSONPathSubscript,
437            exp.JSONPathWildcard,
438        }
439
440        TYPE_MAPPING = {
441            **generator.Generator.TYPE_MAPPING,
442            exp.DataType.Type.BIT: "BOOLEAN",
443            exp.DataType.Type.DATETIME: "TIMESTAMP",
444            exp.DataType.Type.TEXT: "STRING",
445            exp.DataType.Type.TIME: "TIMESTAMP",
446            exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP",
447            exp.DataType.Type.VARBINARY: "BINARY",
448        }
449
450        TRANSFORMS = {
451            **generator.Generator.TRANSFORMS,
452            exp.Group: transforms.preprocess([transforms.unalias_group]),
453            exp.Select: transforms.preprocess(
454                [
455                    transforms.eliminate_qualify,
456                    transforms.eliminate_distinct_on,
457                    transforms.unnest_to_explode,
458                ]
459            ),
460            exp.Property: _property_sql,
461            exp.AnyValue: rename_func("FIRST"),
462            exp.ApproxDistinct: approx_count_distinct_sql,
463            exp.ArgMax: arg_max_or_min_no_count("MAX_BY"),
464            exp.ArgMin: arg_max_or_min_no_count("MIN_BY"),
465            exp.ArrayConcat: rename_func("CONCAT"),
466            exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this),
467            exp.ArraySize: rename_func("SIZE"),
468            exp.ArraySort: _array_sort_sql,
469            exp.With: no_recursive_cte_sql,
470            exp.DateAdd: _add_date_sql,
471            exp.DateDiff: _date_diff_sql,
472            exp.DateStrToDate: datestrtodate_sql,
473            exp.DateSub: _add_date_sql,
474            exp.DateToDi: lambda self,
475            e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)",
476            exp.DiToDate: lambda self,
477            e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})",
478            exp.FileFormatProperty: lambda self,
479            e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}",
480            exp.FromBase64: rename_func("UNBASE64"),
481            exp.If: if_sql(),
482            exp.ILike: no_ilike_sql,
483            exp.IsNan: rename_func("ISNAN"),
484            exp.JSONExtract: lambda self, e: self.func("GET_JSON_OBJECT", e.this, e.expression),
485            exp.JSONExtractScalar: lambda self, e: self.func(
486                "GET_JSON_OBJECT", e.this, e.expression
487            ),
488            exp.JSONFormat: _json_format_sql,
489            exp.Left: left_to_substring_sql,
490            exp.Map: var_map_sql,
491            exp.Max: max_or_greatest,
492            exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
493            exp.Min: min_or_least,
494            exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression),
495            exp.NotNullColumnConstraint: lambda _, e: (
496                "" if e.args.get("allow_null") else "NOT NULL"
497            ),
498            exp.VarMap: var_map_sql,
499            exp.Create: preprocess(
500                [
501                    remove_unique_constraints,
502                    ctas_with_tmp_tables_to_create_tmp_view,
503                    move_schema_columns_to_partitioned_by,
504                ]
505            ),
506            exp.Quantile: rename_func("PERCENTILE"),
507            exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"),
508            exp.RegexpExtract: regexp_extract_sql,
509            exp.RegexpReplace: regexp_replace_sql,
510            exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"),
511            exp.RegexpSplit: rename_func("SPLIT"),
512            exp.Right: right_to_substring_sql,
513            exp.SafeDivide: no_safe_divide_sql,
514            exp.SchemaCommentProperty: lambda self, e: self.naked_property(e),
515            exp.ArrayUniqueAgg: rename_func("COLLECT_SET"),
516            exp.Split: lambda self, e: self.func(
517                "SPLIT", e.this, self.func("CONCAT", "'\\\\Q'", e.expression)
518            ),
519            exp.StrPosition: strposition_to_locate_sql,
520            exp.StrToDate: _str_to_date_sql,
521            exp.StrToTime: _str_to_time_sql,
522            exp.StrToUnix: _str_to_unix_sql,
523            exp.StructExtract: struct_extract_sql,
524            exp.TimeStrToDate: rename_func("TO_DATE"),
525            exp.TimeStrToTime: timestrtotime_sql,
526            exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"),
527            exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)),
528            exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"),
529            exp.ToBase64: rename_func("BASE64"),
530            exp.TsOrDiToDi: lambda self,
531            e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)",
532            exp.TsOrDsAdd: _add_date_sql,
533            exp.TsOrDsDiff: _date_diff_sql,
534            exp.TsOrDsToDate: _to_date_sql,
535            exp.TryCast: no_trycast_sql,
536            exp.UnixToStr: lambda self, e: self.func(
537                "FROM_UNIXTIME", e.this, time_format("hive")(self, e)
538            ),
539            exp.UnixToTime: rename_func("FROM_UNIXTIME"),
540            exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"),
541            exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}",
542            exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"),
543            exp.NumberToStr: rename_func("FORMAT_NUMBER"),
544            exp.National: lambda self, e: self.national_sql(e, prefix=""),
545            exp.ClusteredColumnConstraint: lambda self,
546            e: f"({self.expressions(e, 'this', indent=False)})",
547            exp.NonClusteredColumnConstraint: lambda self,
548            e: f"({self.expressions(e, 'this', indent=False)})",
549            exp.NotForReplicationColumnConstraint: lambda *_: "",
550            exp.OnProperty: lambda *_: "",
551            exp.PrimaryKeyColumnConstraint: lambda *_: "PRIMARY KEY",
552        }
553
554        PROPERTIES_LOCATION = {
555            **generator.Generator.PROPERTIES_LOCATION,
556            exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA,
557            exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
558            exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
559            exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED,
560        }
561
562        def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str:
563            if isinstance(expression.this, exp.JSONPathWildcard):
564                self.unsupported("Unsupported wildcard in JSONPathKey expression")
565                return ""
566
567            return super()._jsonpathkey_sql(expression)
568
569        def parameter_sql(self, expression: exp.Parameter) -> str:
570            this = self.sql(expression, "this")
571            expression_sql = self.sql(expression, "expression")
572
573            parent = expression.parent
574            this = f"{this}:{expression_sql}" if expression_sql else this
575
576            if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem):
577                # We need to produce SET key = value instead of SET ${key} = value
578                return this
579
580            return f"${{{this}}}"
581
582        def schema_sql(self, expression: exp.Schema) -> str:
583            for ordered in expression.find_all(exp.Ordered):
584                if ordered.args.get("desc") is False:
585                    ordered.set("desc", None)
586
587            return super().schema_sql(expression)
588
589        def constraint_sql(self, expression: exp.Constraint) -> str:
590            for prop in list(expression.find_all(exp.Properties)):
591                prop.pop()
592
593            this = self.sql(expression, "this")
594            expressions = self.expressions(expression, sep=" ", flat=True)
595            return f"CONSTRAINT {this} {expressions}"
596
597        def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str:
598            serde_props = self.sql(expression, "serde_properties")
599            serde_props = f" {serde_props}" if serde_props else ""
600            return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}"
601
602        def arrayagg_sql(self, expression: exp.ArrayAgg) -> str:
603            return self.func(
604                "COLLECT_LIST",
605                expression.this.this if isinstance(expression.this, exp.Order) else expression.this,
606            )
607
608        def with_properties(self, properties: exp.Properties) -> str:
609            return self.properties(properties, prefix=self.seg("TBLPROPERTIES"))
610
611        def datatype_sql(self, expression: exp.DataType) -> str:
612            if (
613                expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR)
614                and not expression.expressions
615            ):
616                expression = exp.DataType.build("text")
617            elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions:
618                expression.set("this", exp.DataType.Type.VARCHAR)
619            elif expression.this in exp.DataType.TEMPORAL_TYPES:
620                expression = exp.DataType.build(expression.this)
621            elif expression.is_type("float"):
622                size_expression = expression.find(exp.DataTypeParam)
623                if size_expression:
624                    size = int(size_expression.name)
625                    expression = (
626                        exp.DataType.build("float") if size <= 32 else exp.DataType.build("double")
627                    )
628
629            return super().datatype_sql(expression)
630
631        def version_sql(self, expression: exp.Version) -> str:
632            sql = super().version_sql(expression)
633            return sql.replace("FOR ", "", 1)
DATE_DELTA_INTERVAL = {'YEAR': ('ADD_MONTHS', 12), 'MONTH': ('ADD_MONTHS', 1), 'QUARTER': ('ADD_MONTHS', 3), 'WEEK': ('DATE_ADD', 7), 'DAY': ('DATE_ADD', 1)}
TIME_DIFF_FACTOR = {'MILLISECOND': ' * 1000', 'SECOND': '', 'MINUTE': ' / 60', 'HOUR': ' / 3600'}
DIFF_MONTH_SWITCH = ('YEAR', 'QUARTER', 'MONTH')
class Hive(sqlglot.dialects.dialect.Dialect):
183class Hive(Dialect):
184    ALIAS_POST_TABLESAMPLE = True
185    IDENTIFIERS_CAN_START_WITH_DIGIT = True
186    SUPPORTS_USER_DEFINED_TYPES = False
187    SAFE_DIVISION = True
188
189    # https://spark.apache.org/docs/latest/sql-ref-identifier.html#description
190    NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE
191
192    TIME_MAPPING = {
193        "y": "%Y",
194        "Y": "%Y",
195        "YYYY": "%Y",
196        "yyyy": "%Y",
197        "YY": "%y",
198        "yy": "%y",
199        "MMMM": "%B",
200        "MMM": "%b",
201        "MM": "%m",
202        "M": "%-m",
203        "dd": "%d",
204        "d": "%-d",
205        "HH": "%H",
206        "H": "%-H",
207        "hh": "%I",
208        "h": "%-I",
209        "mm": "%M",
210        "m": "%-M",
211        "ss": "%S",
212        "s": "%-S",
213        "SSSSSS": "%f",
214        "a": "%p",
215        "DD": "%j",
216        "D": "%-j",
217        "E": "%a",
218        "EE": "%a",
219        "EEE": "%a",
220        "EEEE": "%A",
221    }
222
223    DATE_FORMAT = "'yyyy-MM-dd'"
224    DATEINT_FORMAT = "'yyyyMMdd'"
225    TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'"
226
227    class Tokenizer(tokens.Tokenizer):
228        QUOTES = ["'", '"']
229        IDENTIFIERS = ["`"]
230        STRING_ESCAPES = ["\\"]
231
232        SINGLE_TOKENS = {
233            **tokens.Tokenizer.SINGLE_TOKENS,
234            "$": TokenType.PARAMETER,
235        }
236
237        KEYWORDS = {
238            **tokens.Tokenizer.KEYWORDS,
239            "ADD ARCHIVE": TokenType.COMMAND,
240            "ADD ARCHIVES": TokenType.COMMAND,
241            "ADD FILE": TokenType.COMMAND,
242            "ADD FILES": TokenType.COMMAND,
243            "ADD JAR": TokenType.COMMAND,
244            "ADD JARS": TokenType.COMMAND,
245            "MSCK REPAIR": TokenType.COMMAND,
246            "REFRESH": TokenType.REFRESH,
247            "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT,
248            "VERSION AS OF": TokenType.VERSION_SNAPSHOT,
249            "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES,
250        }
251
252        NUMERIC_LITERALS = {
253            "L": "BIGINT",
254            "S": "SMALLINT",
255            "Y": "TINYINT",
256            "D": "DOUBLE",
257            "F": "FLOAT",
258            "BD": "DECIMAL",
259        }
260
261    class Parser(parser.Parser):
262        LOG_DEFAULTS_TO_LN = True
263        STRICT_CAST = False
264        VALUES_FOLLOWED_BY_PAREN = False
265
266        FUNCTIONS = {
267            **parser.Parser.FUNCTIONS,
268            "BASE64": exp.ToBase64.from_arg_list,
269            "COLLECT_LIST": exp.ArrayAgg.from_arg_list,
270            "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list,
271            "DATE_ADD": lambda args: exp.TsOrDsAdd(
272                this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY")
273            ),
274            "DATE_FORMAT": lambda args: build_formatted_time(exp.TimeToStr, "hive")(
275                [
276                    exp.TimeStrToTime(this=seq_get(args, 0)),
277                    seq_get(args, 1),
278                ]
279            ),
280            "DATE_SUB": lambda args: exp.TsOrDsAdd(
281                this=seq_get(args, 0),
282                expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)),
283                unit=exp.Literal.string("DAY"),
284            ),
285            "DATEDIFF": lambda args: exp.DateDiff(
286                this=exp.TsOrDsToDate(this=seq_get(args, 0)),
287                expression=exp.TsOrDsToDate(this=seq_get(args, 1)),
288            ),
289            "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))),
290            "FIRST": _build_with_ignore_nulls(exp.First),
291            "FIRST_VALUE": _build_with_ignore_nulls(exp.FirstValue),
292            "FROM_UNIXTIME": build_formatted_time(exp.UnixToStr, "hive", True),
293            "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list,
294            "LAST": _build_with_ignore_nulls(exp.Last),
295            "LAST_VALUE": _build_with_ignore_nulls(exp.LastValue),
296            "LOCATE": locate_to_strposition,
297            "MAP": parser.build_var_map,
298            "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)),
299            "PERCENTILE": exp.Quantile.from_arg_list,
300            "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list,
301            "REGEXP_EXTRACT": lambda args: exp.RegexpExtract(
302                this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2)
303            ),
304            "SIZE": exp.ArraySize.from_arg_list,
305            "SPLIT": exp.RegexpSplit.from_arg_list,
306            "STR_TO_MAP": lambda args: exp.StrToMap(
307                this=seq_get(args, 0),
308                pair_delim=seq_get(args, 1) or exp.Literal.string(","),
309                key_value_delim=seq_get(args, 2) or exp.Literal.string(":"),
310            ),
311            "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "hive"),
312            "TO_JSON": exp.JSONFormat.from_arg_list,
313            "UNBASE64": exp.FromBase64.from_arg_list,
314            "UNIX_TIMESTAMP": build_formatted_time(exp.StrToUnix, "hive", True),
315            "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)),
316        }
317
318        NO_PAREN_FUNCTION_PARSERS = {
319            **parser.Parser.NO_PAREN_FUNCTION_PARSERS,
320            "TRANSFORM": lambda self: self._parse_transform(),
321        }
322
323        PROPERTY_PARSERS = {
324            **parser.Parser.PROPERTY_PARSERS,
325            "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties(
326                expressions=self._parse_wrapped_csv(self._parse_property)
327            ),
328        }
329
330        def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]:
331            if not self._match(TokenType.L_PAREN, advance=False):
332                self._retreat(self._index - 1)
333                return None
334
335            args = self._parse_wrapped_csv(self._parse_lambda)
336            row_format_before = self._parse_row_format(match_row=True)
337
338            record_writer = None
339            if self._match_text_seq("RECORDWRITER"):
340                record_writer = self._parse_string()
341
342            if not self._match(TokenType.USING):
343                return exp.Transform.from_arg_list(args)
344
345            command_script = self._parse_string()
346
347            self._match(TokenType.ALIAS)
348            schema = self._parse_schema()
349
350            row_format_after = self._parse_row_format(match_row=True)
351            record_reader = None
352            if self._match_text_seq("RECORDREADER"):
353                record_reader = self._parse_string()
354
355            return self.expression(
356                exp.QueryTransform,
357                expressions=args,
358                command_script=command_script,
359                schema=schema,
360                row_format_before=row_format_before,
361                record_writer=record_writer,
362                row_format_after=row_format_after,
363                record_reader=record_reader,
364            )
365
366        def _parse_types(
367            self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
368        ) -> t.Optional[exp.Expression]:
369            """
370            Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to
371            STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0:
372
373                spark-sql (default)> select cast(1234 as varchar(2));
374                23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support
375                char/varchar type and simply treats them as string type. Please use string type
376                directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString
377                to true, so that Spark treat them as string type as same as Spark 3.0 and earlier
378
379                1234
380                Time taken: 4.265 seconds, Fetched 1 row(s)
381
382            This shows that Spark doesn't truncate the value into '12', which is inconsistent with
383            what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly.
384
385            Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html
386            """
387            this = super()._parse_types(
388                check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
389            )
390
391            if this and not schema:
392                return this.transform(
393                    lambda node: (
394                        node.replace(exp.DataType.build("text"))
395                        if isinstance(node, exp.DataType) and node.is_type("char", "varchar")
396                        else node
397                    ),
398                    copy=False,
399                )
400
401            return this
402
403        def _parse_partition_and_order(
404            self,
405        ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]:
406            return (
407                (
408                    self._parse_csv(self._parse_conjunction)
409                    if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY})
410                    else []
411                ),
412                super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)),
413            )
414
415    class Generator(generator.Generator):
416        LIMIT_FETCH = "LIMIT"
417        TABLESAMPLE_WITH_METHOD = False
418        JOIN_HINTS = False
419        TABLE_HINTS = False
420        QUERY_HINTS = False
421        INDEX_ON = "ON TABLE"
422        EXTRACT_ALLOWS_QUOTES = False
423        NVL2_SUPPORTED = False
424        LAST_DAY_SUPPORTS_DATE_PART = False
425        JSON_PATH_SINGLE_QUOTE_ESCAPE = True
426
427        EXPRESSIONS_WITHOUT_NESTED_CTES = {
428            exp.Insert,
429            exp.Select,
430            exp.Subquery,
431            exp.Union,
432        }
433
434        SUPPORTED_JSON_PATH_PARTS = {
435            exp.JSONPathKey,
436            exp.JSONPathRoot,
437            exp.JSONPathSubscript,
438            exp.JSONPathWildcard,
439        }
440
441        TYPE_MAPPING = {
442            **generator.Generator.TYPE_MAPPING,
443            exp.DataType.Type.BIT: "BOOLEAN",
444            exp.DataType.Type.DATETIME: "TIMESTAMP",
445            exp.DataType.Type.TEXT: "STRING",
446            exp.DataType.Type.TIME: "TIMESTAMP",
447            exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP",
448            exp.DataType.Type.VARBINARY: "BINARY",
449        }
450
451        TRANSFORMS = {
452            **generator.Generator.TRANSFORMS,
453            exp.Group: transforms.preprocess([transforms.unalias_group]),
454            exp.Select: transforms.preprocess(
455                [
456                    transforms.eliminate_qualify,
457                    transforms.eliminate_distinct_on,
458                    transforms.unnest_to_explode,
459                ]
460            ),
461            exp.Property: _property_sql,
462            exp.AnyValue: rename_func("FIRST"),
463            exp.ApproxDistinct: approx_count_distinct_sql,
464            exp.ArgMax: arg_max_or_min_no_count("MAX_BY"),
465            exp.ArgMin: arg_max_or_min_no_count("MIN_BY"),
466            exp.ArrayConcat: rename_func("CONCAT"),
467            exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this),
468            exp.ArraySize: rename_func("SIZE"),
469            exp.ArraySort: _array_sort_sql,
470            exp.With: no_recursive_cte_sql,
471            exp.DateAdd: _add_date_sql,
472            exp.DateDiff: _date_diff_sql,
473            exp.DateStrToDate: datestrtodate_sql,
474            exp.DateSub: _add_date_sql,
475            exp.DateToDi: lambda self,
476            e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)",
477            exp.DiToDate: lambda self,
478            e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})",
479            exp.FileFormatProperty: lambda self,
480            e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}",
481            exp.FromBase64: rename_func("UNBASE64"),
482            exp.If: if_sql(),
483            exp.ILike: no_ilike_sql,
484            exp.IsNan: rename_func("ISNAN"),
485            exp.JSONExtract: lambda self, e: self.func("GET_JSON_OBJECT", e.this, e.expression),
486            exp.JSONExtractScalar: lambda self, e: self.func(
487                "GET_JSON_OBJECT", e.this, e.expression
488            ),
489            exp.JSONFormat: _json_format_sql,
490            exp.Left: left_to_substring_sql,
491            exp.Map: var_map_sql,
492            exp.Max: max_or_greatest,
493            exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
494            exp.Min: min_or_least,
495            exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression),
496            exp.NotNullColumnConstraint: lambda _, e: (
497                "" if e.args.get("allow_null") else "NOT NULL"
498            ),
499            exp.VarMap: var_map_sql,
500            exp.Create: preprocess(
501                [
502                    remove_unique_constraints,
503                    ctas_with_tmp_tables_to_create_tmp_view,
504                    move_schema_columns_to_partitioned_by,
505                ]
506            ),
507            exp.Quantile: rename_func("PERCENTILE"),
508            exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"),
509            exp.RegexpExtract: regexp_extract_sql,
510            exp.RegexpReplace: regexp_replace_sql,
511            exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"),
512            exp.RegexpSplit: rename_func("SPLIT"),
513            exp.Right: right_to_substring_sql,
514            exp.SafeDivide: no_safe_divide_sql,
515            exp.SchemaCommentProperty: lambda self, e: self.naked_property(e),
516            exp.ArrayUniqueAgg: rename_func("COLLECT_SET"),
517            exp.Split: lambda self, e: self.func(
518                "SPLIT", e.this, self.func("CONCAT", "'\\\\Q'", e.expression)
519            ),
520            exp.StrPosition: strposition_to_locate_sql,
521            exp.StrToDate: _str_to_date_sql,
522            exp.StrToTime: _str_to_time_sql,
523            exp.StrToUnix: _str_to_unix_sql,
524            exp.StructExtract: struct_extract_sql,
525            exp.TimeStrToDate: rename_func("TO_DATE"),
526            exp.TimeStrToTime: timestrtotime_sql,
527            exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"),
528            exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)),
529            exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"),
530            exp.ToBase64: rename_func("BASE64"),
531            exp.TsOrDiToDi: lambda self,
532            e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)",
533            exp.TsOrDsAdd: _add_date_sql,
534            exp.TsOrDsDiff: _date_diff_sql,
535            exp.TsOrDsToDate: _to_date_sql,
536            exp.TryCast: no_trycast_sql,
537            exp.UnixToStr: lambda self, e: self.func(
538                "FROM_UNIXTIME", e.this, time_format("hive")(self, e)
539            ),
540            exp.UnixToTime: rename_func("FROM_UNIXTIME"),
541            exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"),
542            exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}",
543            exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"),
544            exp.NumberToStr: rename_func("FORMAT_NUMBER"),
545            exp.National: lambda self, e: self.national_sql(e, prefix=""),
546            exp.ClusteredColumnConstraint: lambda self,
547            e: f"({self.expressions(e, 'this', indent=False)})",
548            exp.NonClusteredColumnConstraint: lambda self,
549            e: f"({self.expressions(e, 'this', indent=False)})",
550            exp.NotForReplicationColumnConstraint: lambda *_: "",
551            exp.OnProperty: lambda *_: "",
552            exp.PrimaryKeyColumnConstraint: lambda *_: "PRIMARY KEY",
553        }
554
555        PROPERTIES_LOCATION = {
556            **generator.Generator.PROPERTIES_LOCATION,
557            exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA,
558            exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
559            exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
560            exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED,
561        }
562
563        def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str:
564            if isinstance(expression.this, exp.JSONPathWildcard):
565                self.unsupported("Unsupported wildcard in JSONPathKey expression")
566                return ""
567
568            return super()._jsonpathkey_sql(expression)
569
570        def parameter_sql(self, expression: exp.Parameter) -> str:
571            this = self.sql(expression, "this")
572            expression_sql = self.sql(expression, "expression")
573
574            parent = expression.parent
575            this = f"{this}:{expression_sql}" if expression_sql else this
576
577            if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem):
578                # We need to produce SET key = value instead of SET ${key} = value
579                return this
580
581            return f"${{{this}}}"
582
583        def schema_sql(self, expression: exp.Schema) -> str:
584            for ordered in expression.find_all(exp.Ordered):
585                if ordered.args.get("desc") is False:
586                    ordered.set("desc", None)
587
588            return super().schema_sql(expression)
589
590        def constraint_sql(self, expression: exp.Constraint) -> str:
591            for prop in list(expression.find_all(exp.Properties)):
592                prop.pop()
593
594            this = self.sql(expression, "this")
595            expressions = self.expressions(expression, sep=" ", flat=True)
596            return f"CONSTRAINT {this} {expressions}"
597
598        def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str:
599            serde_props = self.sql(expression, "serde_properties")
600            serde_props = f" {serde_props}" if serde_props else ""
601            return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}"
602
603        def arrayagg_sql(self, expression: exp.ArrayAgg) -> str:
604            return self.func(
605                "COLLECT_LIST",
606                expression.this.this if isinstance(expression.this, exp.Order) else expression.this,
607            )
608
609        def with_properties(self, properties: exp.Properties) -> str:
610            return self.properties(properties, prefix=self.seg("TBLPROPERTIES"))
611
612        def datatype_sql(self, expression: exp.DataType) -> str:
613            if (
614                expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR)
615                and not expression.expressions
616            ):
617                expression = exp.DataType.build("text")
618            elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions:
619                expression.set("this", exp.DataType.Type.VARCHAR)
620            elif expression.this in exp.DataType.TEMPORAL_TYPES:
621                expression = exp.DataType.build(expression.this)
622            elif expression.is_type("float"):
623                size_expression = expression.find(exp.DataTypeParam)
624                if size_expression:
625                    size = int(size_expression.name)
626                    expression = (
627                        exp.DataType.build("float") if size <= 32 else exp.DataType.build("double")
628                    )
629
630            return super().datatype_sql(expression)
631
632        def version_sql(self, expression: exp.Version) -> str:
633            sql = super().version_sql(expression)
634            return sql.replace("FOR ", "", 1)
ALIAS_POST_TABLESAMPLE = True

Whether the table alias comes after tablesample.

IDENTIFIERS_CAN_START_WITH_DIGIT = True

Whether an unquoted identifier can start with a digit.

SUPPORTS_USER_DEFINED_TYPES = False

Whether user-defined data types are supported.

SAFE_DIVISION = True

Whether division by zero throws an error (False) or returns NULL (True).

NORMALIZATION_STRATEGY = <NormalizationStrategy.CASE_INSENSITIVE: 'CASE_INSENSITIVE'>

Specifies the strategy according to which identifiers should be normalized.

TIME_MAPPING: Dict[str, str] = {'y': '%Y', 'Y': '%Y', 'YYYY': '%Y', 'yyyy': '%Y', 'YY': '%y', 'yy': '%y', 'MMMM': '%B', 'MMM': '%b', 'MM': '%m', 'M': '%-m', 'dd': '%d', 'd': '%-d', 'HH': '%H', 'H': '%-H', 'hh': '%I', 'h': '%-I', 'mm': '%M', 'm': '%-M', 'ss': '%S', 's': '%-S', 'SSSSSS': '%f', 'a': '%p', 'DD': '%j', 'D': '%-j', 'E': '%a', 'EE': '%a', 'EEE': '%a', 'EEEE': '%A'}

Associates this dialect's time formats with their equivalent Python strftime formats.

DATE_FORMAT = "'yyyy-MM-dd'"
DATEINT_FORMAT = "'yyyyMMdd'"
TIME_FORMAT = "'yyyy-MM-dd HH:mm:ss'"
tokenizer_class = <class 'Hive.Tokenizer'>
parser_class = <class 'Hive.Parser'>
generator_class = <class 'Hive.Generator'>
TIME_TRIE: Dict = {'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
FORMAT_TRIE: Dict = {'y': {0: True, 'y': {'y': {'y': {0: True}}, 0: True}}, 'Y': {0: True, 'Y': {'Y': {'Y': {0: True}}, 0: True}}, 'M': {'M': {'M': {'M': {0: True}, 0: True}, 0: True}, 0: True}, 'd': {'d': {0: True}, 0: True}, 'H': {'H': {0: True}, 0: True}, 'h': {'h': {0: True}, 0: True}, 'm': {'m': {0: True}, 0: True}, 's': {'s': {0: True}, 0: True}, 'S': {'S': {'S': {'S': {'S': {'S': {0: True}}}}}}, 'a': {0: True}, 'D': {'D': {0: True}, 0: True}, 'E': {0: True, 'E': {0: True, 'E': {0: True, 'E': {0: True}}}}}
INVERSE_TIME_MAPPING: Dict[str, str] = {'%Y': 'yyyy', '%y': 'yy', '%B': 'MMMM', '%b': 'MMM', '%m': 'MM', '%-m': 'M', '%d': 'dd', '%-d': 'd', '%H': 'HH', '%-H': 'H', '%I': 'hh', '%-I': 'h', '%M': 'mm', '%-M': 'm', '%S': 'ss', '%-S': 's', '%f': 'SSSSSS', '%p': 'a', '%j': 'DD', '%-j': 'D', '%a': 'EEE', '%A': 'EEEE'}
INVERSE_TIME_TRIE: Dict = {'%': {'Y': {0: True}, 'y': {0: True}, 'B': {0: True}, 'b': {0: True}, 'm': {0: True}, '-': {'m': {0: True}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'j': {0: True}}, 'd': {0: True}, 'H': {0: True}, 'I': {0: True}, 'M': {0: True}, 'S': {0: True}, 'f': {0: True}, 'p': {0: True}, 'j': {0: True}, 'a': {0: True}, 'A': {0: True}}}
INVERSE_ESCAPE_SEQUENCES: Dict[str, str] = {}
QUOTE_START = "'"
QUOTE_END = "'"
IDENTIFIER_START = '`'
IDENTIFIER_END = '`'
BIT_START: Optional[str] = None
BIT_END: Optional[str] = None
HEX_START: Optional[str] = None
HEX_END: Optional[str] = None
BYTE_START: Optional[str] = None
BYTE_END: Optional[str] = None
UNICODE_START: Optional[str] = None
UNICODE_END: Optional[str] = None
class Hive.Tokenizer(sqlglot.tokens.Tokenizer):
227    class Tokenizer(tokens.Tokenizer):
228        QUOTES = ["'", '"']
229        IDENTIFIERS = ["`"]
230        STRING_ESCAPES = ["\\"]
231
232        SINGLE_TOKENS = {
233            **tokens.Tokenizer.SINGLE_TOKENS,
234            "$": TokenType.PARAMETER,
235        }
236
237        KEYWORDS = {
238            **tokens.Tokenizer.KEYWORDS,
239            "ADD ARCHIVE": TokenType.COMMAND,
240            "ADD ARCHIVES": TokenType.COMMAND,
241            "ADD FILE": TokenType.COMMAND,
242            "ADD FILES": TokenType.COMMAND,
243            "ADD JAR": TokenType.COMMAND,
244            "ADD JARS": TokenType.COMMAND,
245            "MSCK REPAIR": TokenType.COMMAND,
246            "REFRESH": TokenType.REFRESH,
247            "TIMESTAMP AS OF": TokenType.TIMESTAMP_SNAPSHOT,
248            "VERSION AS OF": TokenType.VERSION_SNAPSHOT,
249            "WITH SERDEPROPERTIES": TokenType.SERDE_PROPERTIES,
250        }
251
252        NUMERIC_LITERALS = {
253            "L": "BIGINT",
254            "S": "SMALLINT",
255            "Y": "TINYINT",
256            "D": "DOUBLE",
257            "F": "FLOAT",
258            "BD": "DECIMAL",
259        }
QUOTES = ["'", '"']
IDENTIFIERS = ['`']
STRING_ESCAPES = ['\\']
SINGLE_TOKENS = {'(': <TokenType.L_PAREN: 'L_PAREN'>, ')': <TokenType.R_PAREN: 'R_PAREN'>, '[': <TokenType.L_BRACKET: 'L_BRACKET'>, ']': <TokenType.R_BRACKET: 'R_BRACKET'>, '{': <TokenType.L_BRACE: 'L_BRACE'>, '}': <TokenType.R_BRACE: 'R_BRACE'>, '&': <TokenType.AMP: 'AMP'>, '^': <TokenType.CARET: 'CARET'>, ':': <TokenType.COLON: 'COLON'>, ',': <TokenType.COMMA: 'COMMA'>, '.': <TokenType.DOT: 'DOT'>, '-': <TokenType.DASH: 'DASH'>, '=': <TokenType.EQ: 'EQ'>, '>': <TokenType.GT: 'GT'>, '<': <TokenType.LT: 'LT'>, '%': <TokenType.MOD: 'MOD'>, '!': <TokenType.NOT: 'NOT'>, '|': <TokenType.PIPE: 'PIPE'>, '+': <TokenType.PLUS: 'PLUS'>, ';': <TokenType.SEMICOLON: 'SEMICOLON'>, '/': <TokenType.SLASH: 'SLASH'>, '\\': <TokenType.BACKSLASH: 'BACKSLASH'>, '*': <TokenType.STAR: 'STAR'>, '~': <TokenType.TILDA: 'TILDA'>, '?': <TokenType.PLACEHOLDER: 'PLACEHOLDER'>, '@': <TokenType.PARAMETER: 'PARAMETER'>, "'": <TokenType.QUOTE: 'QUOTE'>, '`': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '"': <TokenType.IDENTIFIER: 'IDENTIFIER'>, '#': <TokenType.HASH: 'HASH'>, '$': <TokenType.PARAMETER: 'PARAMETER'>}
KEYWORDS = {'{%': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{%-': <TokenType.BLOCK_START: 'BLOCK_START'>, '%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '+%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-%}': <TokenType.BLOCK_END: 'BLOCK_END'>, '{{+': <TokenType.BLOCK_START: 'BLOCK_START'>, '{{-': <TokenType.BLOCK_START: 'BLOCK_START'>, '+}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '-}}': <TokenType.BLOCK_END: 'BLOCK_END'>, '/*+': <TokenType.HINT: 'HINT'>, '==': <TokenType.EQ: 'EQ'>, '::': <TokenType.DCOLON: 'DCOLON'>, '||': <TokenType.DPIPE: 'DPIPE'>, '>=': <TokenType.GTE: 'GTE'>, '<=': <TokenType.LTE: 'LTE'>, '<>': <TokenType.NEQ: 'NEQ'>, '!=': <TokenType.NEQ: 'NEQ'>, ':=': <TokenType.COLON_EQ: 'COLON_EQ'>, '<=>': <TokenType.NULLSAFE_EQ: 'NULLSAFE_EQ'>, '->': <TokenType.ARROW: 'ARROW'>, '->>': <TokenType.DARROW: 'DARROW'>, '=>': <TokenType.FARROW: 'FARROW'>, '#>': <TokenType.HASH_ARROW: 'HASH_ARROW'>, '#>>': <TokenType.DHASH_ARROW: 'DHASH_ARROW'>, '<->': <TokenType.LR_ARROW: 'LR_ARROW'>, '&&': <TokenType.DAMP: 'DAMP'>, '??': <TokenType.DQMARK: 'DQMARK'>, 'ALL': <TokenType.ALL: 'ALL'>, 'ALWAYS': <TokenType.ALWAYS: 'ALWAYS'>, 'AND': <TokenType.AND: 'AND'>, 'ANTI': <TokenType.ANTI: 'ANTI'>, 'ANY': <TokenType.ANY: 'ANY'>, 'ASC': <TokenType.ASC: 'ASC'>, 'AS': <TokenType.ALIAS: 'ALIAS'>, 'ASOF': <TokenType.ASOF: 'ASOF'>, 'AUTOINCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'AUTO_INCREMENT': <TokenType.AUTO_INCREMENT: 'AUTO_INCREMENT'>, 'BEGIN': <TokenType.BEGIN: 'BEGIN'>, 'BETWEEN': <TokenType.BETWEEN: 'BETWEEN'>, 'CACHE': <TokenType.CACHE: 'CACHE'>, 'UNCACHE': <TokenType.UNCACHE: 'UNCACHE'>, 'CASE': <TokenType.CASE: 'CASE'>, 'CHARACTER SET': <TokenType.CHARACTER_SET: 'CHARACTER_SET'>, 'CLUSTER BY': <TokenType.CLUSTER_BY: 'CLUSTER_BY'>, 'COLLATE': <TokenType.COLLATE: 'COLLATE'>, 'COLUMN': <TokenType.COLUMN: 'COLUMN'>, 'COMMIT': <TokenType.COMMIT: 'COMMIT'>, 'CONNECT BY': <TokenType.CONNECT_BY: 'CONNECT_BY'>, 'CONSTRAINT': <TokenType.CONSTRAINT: 'CONSTRAINT'>, 'CREATE': <TokenType.CREATE: 'CREATE'>, 'CROSS': <TokenType.CROSS: 'CROSS'>, 'CUBE': <TokenType.CUBE: 'CUBE'>, 'CURRENT_DATE': <TokenType.CURRENT_DATE: 'CURRENT_DATE'>, 'CURRENT_TIME': <TokenType.CURRENT_TIME: 'CURRENT_TIME'>, 'CURRENT_TIMESTAMP': <TokenType.CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'>, 'CURRENT_USER': <TokenType.CURRENT_USER: 'CURRENT_USER'>, 'DATABASE': <TokenType.DATABASE: 'DATABASE'>, 'DEFAULT': <TokenType.DEFAULT: 'DEFAULT'>, 'DELETE': <TokenType.DELETE: 'DELETE'>, 'DESC': <TokenType.DESC: 'DESC'>, 'DESCRIBE': <TokenType.DESCRIBE: 'DESCRIBE'>, 'DISTINCT': <TokenType.DISTINCT: 'DISTINCT'>, 'DISTRIBUTE BY': <TokenType.DISTRIBUTE_BY: 'DISTRIBUTE_BY'>, 'DIV': <TokenType.DIV: 'DIV'>, 'DROP': <TokenType.DROP: 'DROP'>, 'ELSE': <TokenType.ELSE: 'ELSE'>, 'END': <TokenType.END: 'END'>, 'ESCAPE': <TokenType.ESCAPE: 'ESCAPE'>, 'EXCEPT': <TokenType.EXCEPT: 'EXCEPT'>, 'EXECUTE': <TokenType.EXECUTE: 'EXECUTE'>, 'EXISTS': <TokenType.EXISTS: 'EXISTS'>, 'FALSE': <TokenType.FALSE: 'FALSE'>, 'FETCH': <TokenType.FETCH: 'FETCH'>, 'FILTER': <TokenType.FILTER: 'FILTER'>, 'FIRST': <TokenType.FIRST: 'FIRST'>, 'FULL': <TokenType.FULL: 'FULL'>, 'FUNCTION': <TokenType.FUNCTION: 'FUNCTION'>, 'FOR': <TokenType.FOR: 'FOR'>, 'FOREIGN KEY': <TokenType.FOREIGN_KEY: 'FOREIGN_KEY'>, 'FORMAT': <TokenType.FORMAT: 'FORMAT'>, 'FROM': <TokenType.FROM: 'FROM'>, 'GEOGRAPHY': <TokenType.GEOGRAPHY: 'GEOGRAPHY'>, 'GEOMETRY': <TokenType.GEOMETRY: 'GEOMETRY'>, 'GLOB': <TokenType.GLOB: 'GLOB'>, 'GROUP BY': <TokenType.GROUP_BY: 'GROUP_BY'>, 'GROUPING SETS': <TokenType.GROUPING_SETS: 'GROUPING_SETS'>, 'HAVING': <TokenType.HAVING: 'HAVING'>, 'ILIKE': <TokenType.ILIKE: 'ILIKE'>, 'IN': <TokenType.IN: 'IN'>, 'INDEX': <TokenType.INDEX: 'INDEX'>, 'INET': <TokenType.INET: 'INET'>, 'INNER': <TokenType.INNER: 'INNER'>, 'INSERT': <TokenType.INSERT: 'INSERT'>, 'INTERVAL': <TokenType.INTERVAL: 'INTERVAL'>, 'INTERSECT': <TokenType.INTERSECT: 'INTERSECT'>, 'INTO': <TokenType.INTO: 'INTO'>, 'IS': <TokenType.IS: 'IS'>, 'ISNULL': <TokenType.ISNULL: 'ISNULL'>, 'JOIN': <TokenType.JOIN: 'JOIN'>, 'KEEP': <TokenType.KEEP: 'KEEP'>, 'KILL': <TokenType.KILL: 'KILL'>, 'LATERAL': <TokenType.LATERAL: 'LATERAL'>, 'LEFT': <TokenType.LEFT: 'LEFT'>, 'LIKE': <TokenType.LIKE: 'LIKE'>, 'LIMIT': <TokenType.LIMIT: 'LIMIT'>, 'LOAD': <TokenType.LOAD: 'LOAD'>, 'LOCK': <TokenType.LOCK: 'LOCK'>, 'MERGE': <TokenType.MERGE: 'MERGE'>, 'NATURAL': <TokenType.NATURAL: 'NATURAL'>, 'NEXT': <TokenType.NEXT: 'NEXT'>, 'NOT': <TokenType.NOT: 'NOT'>, 'NOTNULL': <TokenType.NOTNULL: 'NOTNULL'>, 'NULL': <TokenType.NULL: 'NULL'>, 'OBJECT': <TokenType.OBJECT: 'OBJECT'>, 'OFFSET': <TokenType.OFFSET: 'OFFSET'>, 'ON': <TokenType.ON: 'ON'>, 'OR': <TokenType.OR: 'OR'>, 'XOR': <TokenType.XOR: 'XOR'>, 'ORDER BY': <TokenType.ORDER_BY: 'ORDER_BY'>, 'ORDINALITY': <TokenType.ORDINALITY: 'ORDINALITY'>, 'OUTER': <TokenType.OUTER: 'OUTER'>, 'OVER': <TokenType.OVER: 'OVER'>, 'OVERLAPS': <TokenType.OVERLAPS: 'OVERLAPS'>, 'OVERWRITE': <TokenType.OVERWRITE: 'OVERWRITE'>, 'PARTITION': <TokenType.PARTITION: 'PARTITION'>, 'PARTITION BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PARTITIONED_BY': <TokenType.PARTITION_BY: 'PARTITION_BY'>, 'PERCENT': <TokenType.PERCENT: 'PERCENT'>, 'PIVOT': <TokenType.PIVOT: 'PIVOT'>, 'PRAGMA': <TokenType.PRAGMA: 'PRAGMA'>, 'PRIMARY KEY': <TokenType.PRIMARY_KEY: 'PRIMARY_KEY'>, 'PROCEDURE': <TokenType.PROCEDURE: 'PROCEDURE'>, 'QUALIFY': <TokenType.QUALIFY: 'QUALIFY'>, 'RANGE': <TokenType.RANGE: 'RANGE'>, 'RECURSIVE': <TokenType.RECURSIVE: 'RECURSIVE'>, 'REGEXP': <TokenType.RLIKE: 'RLIKE'>, 'REPLACE': <TokenType.REPLACE: 'REPLACE'>, 'RETURNING': <TokenType.RETURNING: 'RETURNING'>, 'REFERENCES': <TokenType.REFERENCES: 'REFERENCES'>, 'RIGHT': <TokenType.RIGHT: 'RIGHT'>, 'RLIKE': <TokenType.RLIKE: 'RLIKE'>, 'ROLLBACK': <TokenType.ROLLBACK: 'ROLLBACK'>, 'ROLLUP': <TokenType.ROLLUP: 'ROLLUP'>, 'ROW': <TokenType.ROW: 'ROW'>, 'ROWS': <TokenType.ROWS: 'ROWS'>, 'SCHEMA': <TokenType.SCHEMA: 'SCHEMA'>, 'SELECT': <TokenType.SELECT: 'SELECT'>, 'SEMI': <TokenType.SEMI: 'SEMI'>, 'SET': <TokenType.SET: 'SET'>, 'SETTINGS': <TokenType.SETTINGS: 'SETTINGS'>, 'SHOW': <TokenType.SHOW: 'SHOW'>, 'SIMILAR TO': <TokenType.SIMILAR_TO: 'SIMILAR_TO'>, 'SOME': <TokenType.SOME: 'SOME'>, 'SORT BY': <TokenType.SORT_BY: 'SORT_BY'>, 'START WITH': <TokenType.START_WITH: 'START_WITH'>, 'TABLE': <TokenType.TABLE: 'TABLE'>, 'TABLESAMPLE': <TokenType.TABLE_SAMPLE: 'TABLE_SAMPLE'>, 'TEMP': <TokenType.TEMPORARY: 'TEMPORARY'>, 'TEMPORARY': <TokenType.TEMPORARY: 'TEMPORARY'>, 'THEN': <TokenType.THEN: 'THEN'>, 'TRUE': <TokenType.TRUE: 'TRUE'>, 'UNION': <TokenType.UNION: 'UNION'>, 'UNKNOWN': <TokenType.UNKNOWN: 'UNKNOWN'>, 'UNNEST': <TokenType.UNNEST: 'UNNEST'>, 'UNPIVOT': <TokenType.UNPIVOT: 'UNPIVOT'>, 'UPDATE': <TokenType.UPDATE: 'UPDATE'>, 'USE': <TokenType.USE: 'USE'>, 'USING': <TokenType.USING: 'USING'>, 'UUID': <TokenType.UUID: 'UUID'>, 'VALUES': <TokenType.VALUES: 'VALUES'>, 'VIEW': <TokenType.VIEW: 'VIEW'>, 'VOLATILE': <TokenType.VOLATILE: 'VOLATILE'>, 'WHEN': <TokenType.WHEN: 'WHEN'>, 'WHERE': <TokenType.WHERE: 'WHERE'>, 'WINDOW': <TokenType.WINDOW: 'WINDOW'>, 'WITH': <TokenType.WITH: 'WITH'>, 'APPLY': <TokenType.APPLY: 'APPLY'>, 'ARRAY': <TokenType.ARRAY: 'ARRAY'>, 'BIT': <TokenType.BIT: 'BIT'>, 'BOOL': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BOOLEAN': <TokenType.BOOLEAN: 'BOOLEAN'>, 'BYTE': <TokenType.TINYINT: 'TINYINT'>, 'MEDIUMINT': <TokenType.MEDIUMINT: 'MEDIUMINT'>, 'INT1': <TokenType.TINYINT: 'TINYINT'>, 'TINYINT': <TokenType.TINYINT: 'TINYINT'>, 'INT16': <TokenType.SMALLINT: 'SMALLINT'>, 'SHORT': <TokenType.SMALLINT: 'SMALLINT'>, 'SMALLINT': <TokenType.SMALLINT: 'SMALLINT'>, 'INT128': <TokenType.INT128: 'INT128'>, 'HUGEINT': <TokenType.INT128: 'INT128'>, 'INT2': <TokenType.SMALLINT: 'SMALLINT'>, 'INTEGER': <TokenType.INT: 'INT'>, 'INT': <TokenType.INT: 'INT'>, 'INT4': <TokenType.INT: 'INT'>, 'INT32': <TokenType.INT: 'INT'>, 'INT64': <TokenType.BIGINT: 'BIGINT'>, 'LONG': <TokenType.BIGINT: 'BIGINT'>, 'BIGINT': <TokenType.BIGINT: 'BIGINT'>, 'INT8': <TokenType.TINYINT: 'TINYINT'>, 'DEC': <TokenType.DECIMAL: 'DECIMAL'>, 'DECIMAL': <TokenType.DECIMAL: 'DECIMAL'>, 'BIGDECIMAL': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'BIGNUMERIC': <TokenType.BIGDECIMAL: 'BIGDECIMAL'>, 'MAP': <TokenType.MAP: 'MAP'>, 'NULLABLE': <TokenType.NULLABLE: 'NULLABLE'>, 'NUMBER': <TokenType.DECIMAL: 'DECIMAL'>, 'NUMERIC': <TokenType.DECIMAL: 'DECIMAL'>, 'FIXED': <TokenType.DECIMAL: 'DECIMAL'>, 'REAL': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT4': <TokenType.FLOAT: 'FLOAT'>, 'FLOAT8': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE': <TokenType.DOUBLE: 'DOUBLE'>, 'DOUBLE PRECISION': <TokenType.DOUBLE: 'DOUBLE'>, 'JSON': <TokenType.JSON: 'JSON'>, 'CHAR': <TokenType.CHAR: 'CHAR'>, 'CHARACTER': <TokenType.CHAR: 'CHAR'>, 'NCHAR': <TokenType.NCHAR: 'NCHAR'>, 'VARCHAR': <TokenType.VARCHAR: 'VARCHAR'>, 'VARCHAR2': <TokenType.VARCHAR: 'VARCHAR'>, 'NVARCHAR': <TokenType.NVARCHAR: 'NVARCHAR'>, 'NVARCHAR2': <TokenType.NVARCHAR: 'NVARCHAR'>, 'BPCHAR': <TokenType.BPCHAR: 'BPCHAR'>, 'STR': <TokenType.TEXT: 'TEXT'>, 'STRING': <TokenType.TEXT: 'TEXT'>, 'TEXT': <TokenType.TEXT: 'TEXT'>, 'LONGTEXT': <TokenType.LONGTEXT: 'LONGTEXT'>, 'MEDIUMTEXT': <TokenType.MEDIUMTEXT: 'MEDIUMTEXT'>, 'TINYTEXT': <TokenType.TINYTEXT: 'TINYTEXT'>, 'CLOB': <TokenType.TEXT: 'TEXT'>, 'LONGVARCHAR': <TokenType.TEXT: 'TEXT'>, 'BINARY': <TokenType.BINARY: 'BINARY'>, 'BLOB': <TokenType.VARBINARY: 'VARBINARY'>, 'LONGBLOB': <TokenType.LONGBLOB: 'LONGBLOB'>, 'MEDIUMBLOB': <TokenType.MEDIUMBLOB: 'MEDIUMBLOB'>, 'TINYBLOB': <TokenType.TINYBLOB: 'TINYBLOB'>, 'BYTEA': <TokenType.VARBINARY: 'VARBINARY'>, 'VARBINARY': <TokenType.VARBINARY: 'VARBINARY'>, 'TIME': <TokenType.TIME: 'TIME'>, 'TIMETZ': <TokenType.TIMETZ: 'TIMETZ'>, 'TIMESTAMP': <TokenType.TIMESTAMP: 'TIMESTAMP'>, 'TIMESTAMPTZ': <TokenType.TIMESTAMPTZ: 'TIMESTAMPTZ'>, 'TIMESTAMPLTZ': <TokenType.TIMESTAMPLTZ: 'TIMESTAMPLTZ'>, 'DATE': <TokenType.DATE: 'DATE'>, 'DATETIME': <TokenType.DATETIME: 'DATETIME'>, 'INT4RANGE': <TokenType.INT4RANGE: 'INT4RANGE'>, 'INT4MULTIRANGE': <TokenType.INT4MULTIRANGE: 'INT4MULTIRANGE'>, 'INT8RANGE': <TokenType.INT8RANGE: 'INT8RANGE'>, 'INT8MULTIRANGE': <TokenType.INT8MULTIRANGE: 'INT8MULTIRANGE'>, 'NUMRANGE': <TokenType.NUMRANGE: 'NUMRANGE'>, 'NUMMULTIRANGE': <TokenType.NUMMULTIRANGE: 'NUMMULTIRANGE'>, 'TSRANGE': <TokenType.TSRANGE: 'TSRANGE'>, 'TSMULTIRANGE': <TokenType.TSMULTIRANGE: 'TSMULTIRANGE'>, 'TSTZRANGE': <TokenType.TSTZRANGE: 'TSTZRANGE'>, 'TSTZMULTIRANGE': <TokenType.TSTZMULTIRANGE: 'TSTZMULTIRANGE'>, 'DATERANGE': <TokenType.DATERANGE: 'DATERANGE'>, 'DATEMULTIRANGE': <TokenType.DATEMULTIRANGE: 'DATEMULTIRANGE'>, 'UNIQUE': <TokenType.UNIQUE: 'UNIQUE'>, 'STRUCT': <TokenType.STRUCT: 'STRUCT'>, 'VARIANT': <TokenType.VARIANT: 'VARIANT'>, 'ALTER': <TokenType.ALTER: 'ALTER'>, 'ANALYZE': <TokenType.COMMAND: 'COMMAND'>, 'CALL': <TokenType.COMMAND: 'COMMAND'>, 'COMMENT': <TokenType.COMMENT: 'COMMENT'>, 'COPY': <TokenType.COMMAND: 'COMMAND'>, 'EXPLAIN': <TokenType.COMMAND: 'COMMAND'>, 'GRANT': <TokenType.COMMAND: 'COMMAND'>, 'OPTIMIZE': <TokenType.COMMAND: 'COMMAND'>, 'PREPARE': <TokenType.COMMAND: 'COMMAND'>, 'TRUNCATE': <TokenType.COMMAND: 'COMMAND'>, 'VACUUM': <TokenType.COMMAND: 'COMMAND'>, 'USER-DEFINED': <TokenType.USERDEFINED: 'USERDEFINED'>, 'FOR VERSION': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'FOR TIMESTAMP': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'ADD ARCHIVE': <TokenType.COMMAND: 'COMMAND'>, 'ADD ARCHIVES': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILE': <TokenType.COMMAND: 'COMMAND'>, 'ADD FILES': <TokenType.COMMAND: 'COMMAND'>, 'ADD JAR': <TokenType.COMMAND: 'COMMAND'>, 'ADD JARS': <TokenType.COMMAND: 'COMMAND'>, 'MSCK REPAIR': <TokenType.COMMAND: 'COMMAND'>, 'REFRESH': <TokenType.REFRESH: 'REFRESH'>, 'TIMESTAMP AS OF': <TokenType.TIMESTAMP_SNAPSHOT: 'TIMESTAMP_SNAPSHOT'>, 'VERSION AS OF': <TokenType.VERSION_SNAPSHOT: 'VERSION_SNAPSHOT'>, 'WITH SERDEPROPERTIES': <TokenType.SERDE_PROPERTIES: 'SERDE_PROPERTIES'>}
NUMERIC_LITERALS = {'L': 'BIGINT', 'S': 'SMALLINT', 'Y': 'TINYINT', 'D': 'DOUBLE', 'F': 'FLOAT', 'BD': 'DECIMAL'}
class Hive.Parser(sqlglot.parser.Parser):
261    class Parser(parser.Parser):
262        LOG_DEFAULTS_TO_LN = True
263        STRICT_CAST = False
264        VALUES_FOLLOWED_BY_PAREN = False
265
266        FUNCTIONS = {
267            **parser.Parser.FUNCTIONS,
268            "BASE64": exp.ToBase64.from_arg_list,
269            "COLLECT_LIST": exp.ArrayAgg.from_arg_list,
270            "COLLECT_SET": exp.ArrayUniqueAgg.from_arg_list,
271            "DATE_ADD": lambda args: exp.TsOrDsAdd(
272                this=seq_get(args, 0), expression=seq_get(args, 1), unit=exp.Literal.string("DAY")
273            ),
274            "DATE_FORMAT": lambda args: build_formatted_time(exp.TimeToStr, "hive")(
275                [
276                    exp.TimeStrToTime(this=seq_get(args, 0)),
277                    seq_get(args, 1),
278                ]
279            ),
280            "DATE_SUB": lambda args: exp.TsOrDsAdd(
281                this=seq_get(args, 0),
282                expression=exp.Mul(this=seq_get(args, 1), expression=exp.Literal.number(-1)),
283                unit=exp.Literal.string("DAY"),
284            ),
285            "DATEDIFF": lambda args: exp.DateDiff(
286                this=exp.TsOrDsToDate(this=seq_get(args, 0)),
287                expression=exp.TsOrDsToDate(this=seq_get(args, 1)),
288            ),
289            "DAY": lambda args: exp.Day(this=exp.TsOrDsToDate(this=seq_get(args, 0))),
290            "FIRST": _build_with_ignore_nulls(exp.First),
291            "FIRST_VALUE": _build_with_ignore_nulls(exp.FirstValue),
292            "FROM_UNIXTIME": build_formatted_time(exp.UnixToStr, "hive", True),
293            "GET_JSON_OBJECT": exp.JSONExtractScalar.from_arg_list,
294            "LAST": _build_with_ignore_nulls(exp.Last),
295            "LAST_VALUE": _build_with_ignore_nulls(exp.LastValue),
296            "LOCATE": locate_to_strposition,
297            "MAP": parser.build_var_map,
298            "MONTH": lambda args: exp.Month(this=exp.TsOrDsToDate.from_arg_list(args)),
299            "PERCENTILE": exp.Quantile.from_arg_list,
300            "PERCENTILE_APPROX": exp.ApproxQuantile.from_arg_list,
301            "REGEXP_EXTRACT": lambda args: exp.RegexpExtract(
302                this=seq_get(args, 0), expression=seq_get(args, 1), group=seq_get(args, 2)
303            ),
304            "SIZE": exp.ArraySize.from_arg_list,
305            "SPLIT": exp.RegexpSplit.from_arg_list,
306            "STR_TO_MAP": lambda args: exp.StrToMap(
307                this=seq_get(args, 0),
308                pair_delim=seq_get(args, 1) or exp.Literal.string(","),
309                key_value_delim=seq_get(args, 2) or exp.Literal.string(":"),
310            ),
311            "TO_DATE": build_formatted_time(exp.TsOrDsToDate, "hive"),
312            "TO_JSON": exp.JSONFormat.from_arg_list,
313            "UNBASE64": exp.FromBase64.from_arg_list,
314            "UNIX_TIMESTAMP": build_formatted_time(exp.StrToUnix, "hive", True),
315            "YEAR": lambda args: exp.Year(this=exp.TsOrDsToDate.from_arg_list(args)),
316        }
317
318        NO_PAREN_FUNCTION_PARSERS = {
319            **parser.Parser.NO_PAREN_FUNCTION_PARSERS,
320            "TRANSFORM": lambda self: self._parse_transform(),
321        }
322
323        PROPERTY_PARSERS = {
324            **parser.Parser.PROPERTY_PARSERS,
325            "WITH SERDEPROPERTIES": lambda self: exp.SerdeProperties(
326                expressions=self._parse_wrapped_csv(self._parse_property)
327            ),
328        }
329
330        def _parse_transform(self) -> t.Optional[exp.Transform | exp.QueryTransform]:
331            if not self._match(TokenType.L_PAREN, advance=False):
332                self._retreat(self._index - 1)
333                return None
334
335            args = self._parse_wrapped_csv(self._parse_lambda)
336            row_format_before = self._parse_row_format(match_row=True)
337
338            record_writer = None
339            if self._match_text_seq("RECORDWRITER"):
340                record_writer = self._parse_string()
341
342            if not self._match(TokenType.USING):
343                return exp.Transform.from_arg_list(args)
344
345            command_script = self._parse_string()
346
347            self._match(TokenType.ALIAS)
348            schema = self._parse_schema()
349
350            row_format_after = self._parse_row_format(match_row=True)
351            record_reader = None
352            if self._match_text_seq("RECORDREADER"):
353                record_reader = self._parse_string()
354
355            return self.expression(
356                exp.QueryTransform,
357                expressions=args,
358                command_script=command_script,
359                schema=schema,
360                row_format_before=row_format_before,
361                record_writer=record_writer,
362                row_format_after=row_format_after,
363                record_reader=record_reader,
364            )
365
366        def _parse_types(
367            self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True
368        ) -> t.Optional[exp.Expression]:
369            """
370            Spark (and most likely Hive) treats casts to CHAR(length) and VARCHAR(length) as casts to
371            STRING in all contexts except for schema definitions. For example, this is in Spark v3.4.0:
372
373                spark-sql (default)> select cast(1234 as varchar(2));
374                23/06/06 15:51:18 WARN CharVarcharUtils: The Spark cast operator does not support
375                char/varchar type and simply treats them as string type. Please use string type
376                directly to avoid confusion. Otherwise, you can set spark.sql.legacy.charVarcharAsString
377                to true, so that Spark treat them as string type as same as Spark 3.0 and earlier
378
379                1234
380                Time taken: 4.265 seconds, Fetched 1 row(s)
381
382            This shows that Spark doesn't truncate the value into '12', which is inconsistent with
383            what other dialects (e.g. postgres) do, so we need to drop the length to transpile correctly.
384
385            Reference: https://spark.apache.org/docs/latest/sql-ref-datatypes.html
386            """
387            this = super()._parse_types(
388                check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
389            )
390
391            if this and not schema:
392                return this.transform(
393                    lambda node: (
394                        node.replace(exp.DataType.build("text"))
395                        if isinstance(node, exp.DataType) and node.is_type("char", "varchar")
396                        else node
397                    ),
398                    copy=False,
399                )
400
401            return this
402
403        def _parse_partition_and_order(
404            self,
405        ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]:
406            return (
407                (
408                    self._parse_csv(self._parse_conjunction)
409                    if self._match_set({TokenType.PARTITION_BY, TokenType.DISTRIBUTE_BY})
410                    else []
411                ),
412                super()._parse_order(skip_order_token=self._match(TokenType.SORT_BY)),
413            )

Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.

Arguments:
  • error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
  • error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
  • max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
LOG_DEFAULTS_TO_LN = True
STRICT_CAST = False
VALUES_FOLLOWED_BY_PAREN = False
FUNCTIONS = {'ABS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Abs'>>, 'ANONYMOUS_AGG_FUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnonymousAggFunc'>>, 'ANY_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.AnyValue'>>, 'APPROX_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_COUNT_DISTINCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxDistinct'>>, 'APPROX_QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'APPROX_TOP_K': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxTopK'>>, 'ARG_MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARGMAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'MAX_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMax'>>, 'ARG_MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARGMIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'MIN_BY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArgMin'>>, 'ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Array'>>, 'ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'ARRAY_ALL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAll'>>, 'ARRAY_ANY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAny'>>, 'ARRAY_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayConcat'>>, 'ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayContains'>>, 'FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_FILTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayFilter'>>, 'ARRAY_JOIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayJoin'>>, 'ARRAY_OVERLAPS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayOverlaps'>>, 'ARRAY_SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'ARRAY_SORT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySort'>>, 'ARRAY_SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySum'>>, 'ARRAY_UNION_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUnionAgg'>>, 'ARRAY_UNIQUE_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUniqueAgg'>>, 'AVG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Avg'>>, 'CASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Case'>>, 'CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cast'>>, 'CAST_TO_STR_TYPE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CastToStrType'>>, 'CBRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Cbrt'>>, 'CEIL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CEILING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ceil'>>, 'CHR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Chr'>>, 'COALESCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'IFNULL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'NVL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Coalesce'>>, 'COLLATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Collate'>>, 'COMBINED_AGG_FUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CombinedAggFunc'>>, 'COMBINED_PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CombinedParameterizedAgg'>>, 'CONCAT': <function Parser.<lambda>>, 'CONCAT_WS': <function Parser.<lambda>>, 'COUNT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Count'>>, 'COUNT_IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'COUNTIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CountIf'>>, 'CURRENT_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDate'>>, 'CURRENT_DATETIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentDatetime'>>, 'CURRENT_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTime'>>, 'CURRENT_TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentTimestamp'>>, 'CURRENT_USER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.CurrentUser'>>, 'DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Date'>>, 'DATE_ADD': <function Hive.Parser.<lambda>>, 'DATEDIFF': <function Hive.Parser.<lambda>>, 'DATE_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateDiff'>>, 'DATE_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateFromParts'>>, 'DATE_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateStrToDate'>>, 'DATE_SUB': <function Hive.Parser.<lambda>>, 'DATE_TO_DATE_STR': <function Parser.<lambda>>, 'DATE_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateToDi'>>, 'DATE_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DateTrunc'>>, 'DATETIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeAdd'>>, 'DATETIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeDiff'>>, 'DATETIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeSub'>>, 'DATETIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DatetimeTrunc'>>, 'DAY': <function Hive.Parser.<lambda>>, 'DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAYOFMONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfMonth'>>, 'DAY_OF_WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAYOFWEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfWeek'>>, 'DAY_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DAYOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DayOfYear'>>, 'DECODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Decode'>>, 'DI_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.DiToDate'>>, 'ENCODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Encode'>>, 'EXP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Exp'>>, 'EXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Explode'>>, 'EXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ExplodeOuter'>>, 'EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Extract'>>, 'FIRST': <function _build_with_ignore_nulls.<locals>._parse>, 'FIRST_VALUE': <function _build_with_ignore_nulls.<locals>._parse>, 'FLATTEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Flatten'>>, 'FLOOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Floor'>>, 'FROM_BASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase'>>, 'FROM_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'GENERATE_SERIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GenerateSeries'>>, 'GREATEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Greatest'>>, 'GROUP_CONCAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.GroupConcat'>>, 'HEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hex'>>, 'HLL': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Hll'>>, 'IF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'IIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.If'>>, 'INITCAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Initcap'>>, 'IS_INF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsInf'>>, 'ISINF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsInf'>>, 'IS_NAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'ISNAN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.IsNan'>>, 'J_S_O_N_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArray'>>, 'J_S_O_N_ARRAY_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayAgg'>>, 'JSON_ARRAY_CONTAINS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONArrayContains'>>, 'JSONB_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtract'>>, 'JSONB_EXTRACT_SCALAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONBExtractScalar'>>, 'JSON_EXTRACT': <function build_extract_json_with_path.<locals>._builder>, 'JSON_EXTRACT_SCALAR': <function build_extract_json_with_path.<locals>._builder>, 'JSON_FORMAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'J_S_O_N_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObject'>>, 'J_S_O_N_OBJECT_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONObjectAgg'>>, 'J_S_O_N_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONTable'>>, 'LAG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lag'>>, 'LAST': <function _build_with_ignore_nulls.<locals>._parse>, 'LAST_DAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDay'>>, 'LAST_DAY_OF_MONTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LastDay'>>, 'LAST_VALUE': <function _build_with_ignore_nulls.<locals>._parse>, 'LEAD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lead'>>, 'LEAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Least'>>, 'LEFT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Left'>>, 'LENGTH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Length'>>, 'LEVENSHTEIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Levenshtein'>>, 'LN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Ln'>>, 'LOG': <function build_logarithm>, 'LOG10': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log10'>>, 'LOG2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Log2'>>, 'LOGICAL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOL_AND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'BOOLAND_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalAnd'>>, 'LOGICAL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOL_OR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'BOOLOR_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.LogicalOr'>>, 'LOWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'LCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Lower'>>, 'MD5': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5'>>, 'MD5_DIGEST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MD5Digest'>>, 'MAP': <function build_var_map>, 'MAP_FROM_ENTRIES': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MapFromEntries'>>, 'MATCH_AGAINST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MatchAgainst'>>, 'MAX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Max'>>, 'MIN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Min'>>, 'MONTH': <function Hive.Parser.<lambda>>, 'MONTHS_BETWEEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.MonthsBetween'>>, 'NEXT_VALUE_FOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NextValueFor'>>, 'NTH_VALUE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NthValue'>>, 'NULLIF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nullif'>>, 'NUMBER_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.NumberToStr'>>, 'NVL2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Nvl2'>>, 'OPEN_J_S_O_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.OpenJSON'>>, 'PARAMETERIZED_AGG': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParameterizedAgg'>>, 'PARSE_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'JSON_PARSE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ParseJSON'>>, 'PERCENTILE_CONT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileCont'>>, 'PERCENTILE_DISC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PercentileDisc'>>, 'POSEXPLODE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Posexplode'>>, 'POSEXPLODE_OUTER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.PosexplodeOuter'>>, 'POWER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'POW': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Pow'>>, 'PREDICT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Predict'>>, 'QUANTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'RAND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Rand'>>, 'RANDOM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Rand'>>, 'RANDN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Randn'>>, 'RANGE_N': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RangeN'>>, 'READ_CSV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ReadCSV'>>, 'REDUCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Reduce'>>, 'REGEXP_EXTRACT': <function Hive.Parser.<lambda>>, 'REGEXP_I_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpILike'>>, 'REGEXP_LIKE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpLike'>>, 'REGEXP_REPLACE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpReplace'>>, 'REGEXP_SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'REPEAT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Repeat'>>, 'RIGHT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Right'>>, 'ROUND': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Round'>>, 'ROW_NUMBER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RowNumber'>>, 'SHA': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA1': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA'>>, 'SHA2': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SHA2'>>, 'SAFE_DIVIDE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SafeDivide'>>, 'SORT_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.SortArray'>>, 'SPLIT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.RegexpSplit'>>, 'SQRT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sqrt'>>, 'STANDARD_HASH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StandardHash'>>, 'STAR_MAP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StarMap'>>, 'STARTS_WITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STARTSWITH': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StartsWith'>>, 'STDDEV': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stddev'>>, 'STDDEV_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevPop'>>, 'STDDEV_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StddevSamp'>>, 'STR_POSITION': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrPosition'>>, 'STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToDate'>>, 'STR_TO_MAP': <function Hive.Parser.<lambda>>, 'STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToTime'>>, 'STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StrToUnix'>>, 'STRUCT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Struct'>>, 'STRUCT_EXTRACT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.StructExtract'>>, 'STUFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'INSERT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Stuff'>>, 'SUBSTRING': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Substring'>>, 'SUM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Sum'>>, 'TIME_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeAdd'>>, 'TIME_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeDiff'>>, 'TIME_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeFromParts'>>, 'TIMEFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeFromParts'>>, 'TIME_STR_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToDate'>>, 'TIME_STR_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToTime'>>, 'TIME_STR_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeStrToUnix'>>, 'TIME_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeSub'>>, 'TIME_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToStr'>>, 'TIME_TO_TIME_STR': <function Parser.<lambda>>, 'TIME_TO_UNIX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeToUnix'>>, 'TIME_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimeTrunc'>>, 'TIMESTAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Timestamp'>>, 'TIMESTAMP_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampAdd'>>, 'TIMESTAMPDIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampDiff'>>, 'TIMESTAMP_FROM_PARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampFromParts'>>, 'TIMESTAMPFROMPARTS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampFromParts'>>, 'TIMESTAMP_SUB': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampSub'>>, 'TIMESTAMP_TRUNC': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TimestampTrunc'>>, 'TO_ARRAY': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToArray'>>, 'TO_BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'TO_CHAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToChar'>>, 'TO_DAYS': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToDays'>>, 'TRANSFORM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Transform'>>, 'TRIM': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Trim'>>, 'TRY_CAST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TryCast'>>, 'TS_OR_DI_TO_DI': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDiToDi'>>, 'TS_OR_DS_ADD': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsAdd'>>, 'TS_OR_DS_DIFF': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsDiff'>>, 'TS_OR_DS_TO_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToDate'>>, 'TS_OR_DS_TO_DATE_STR': <function Parser.<lambda>>, 'TS_OR_DS_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.TsOrDsToTime'>>, 'UNHEX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Unhex'>>, 'UNIX_DATE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixDate'>>, 'UNIX_TO_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToStr'>>, 'UNIX_TO_TIME': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTime'>>, 'UNIX_TO_TIME_STR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.UnixToTimeStr'>>, 'UPPER': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'UCASE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Upper'>>, 'VAR_MAP': <function build_var_map>, 'VARIANCE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VAR_SAMP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Variance'>>, 'VARIANCE_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'VAR_POP': <bound method Func.from_arg_list of <class 'sqlglot.expressions.VariancePop'>>, 'WEEK': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Week'>>, 'WEEK_OF_YEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WEEKOFYEAR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.WeekOfYear'>>, 'WHEN': <bound method Func.from_arg_list of <class 'sqlglot.expressions.When'>>, 'X_M_L_TABLE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.XMLTable'>>, 'XOR': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Xor'>>, 'YEAR': <function Hive.Parser.<lambda>>, 'GLOB': <function Parser.<lambda>>, 'JSON_EXTRACT_PATH_TEXT': <function build_extract_json_with_path.<locals>._builder>, 'LIKE': <function build_like>, 'BASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ToBase64'>>, 'COLLECT_LIST': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayAgg'>>, 'COLLECT_SET': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArrayUniqueAgg'>>, 'DATE_FORMAT': <function Hive.Parser.<lambda>>, 'FROM_UNIXTIME': <function build_formatted_time.<locals>._builder>, 'GET_JSON_OBJECT': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONExtractScalar'>>, 'LOCATE': <function locate_to_strposition>, 'PERCENTILE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.Quantile'>>, 'PERCENTILE_APPROX': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ApproxQuantile'>>, 'SIZE': <bound method Func.from_arg_list of <class 'sqlglot.expressions.ArraySize'>>, 'TO_DATE': <function build_formatted_time.<locals>._builder>, 'TO_JSON': <bound method Func.from_arg_list of <class 'sqlglot.expressions.JSONFormat'>>, 'UNBASE64': <bound method Func.from_arg_list of <class 'sqlglot.expressions.FromBase64'>>, 'UNIX_TIMESTAMP': <function build_formatted_time.<locals>._builder>}
NO_PAREN_FUNCTION_PARSERS = {'ANY': <function Parser.<lambda>>, 'CASE': <function Parser.<lambda>>, 'IF': <function Parser.<lambda>>, 'NEXT': <function Parser.<lambda>>, 'TRANSFORM': <function Hive.Parser.<lambda>>}
PROPERTY_PARSERS = {'ALGORITHM': <function Parser.<lambda>>, 'AUTO': <function Parser.<lambda>>, 'AUTO_INCREMENT': <function Parser.<lambda>>, 'BLOCKCOMPRESSION': <function Parser.<lambda>>, 'CHARSET': <function Parser.<lambda>>, 'CHARACTER SET': <function Parser.<lambda>>, 'CHECKSUM': <function Parser.<lambda>>, 'CLUSTER BY': <function Parser.<lambda>>, 'CLUSTERED': <function Parser.<lambda>>, 'COLLATE': <function Parser.<lambda>>, 'COMMENT': <function Parser.<lambda>>, 'CONTAINS': <function Parser.<lambda>>, 'COPY': <function Parser.<lambda>>, 'DATABLOCKSIZE': <function Parser.<lambda>>, 'DEFINER': <function Parser.<lambda>>, 'DETERMINISTIC': <function Parser.<lambda>>, 'DISTKEY': <function Parser.<lambda>>, 'DISTSTYLE': <function Parser.<lambda>>, 'ENGINE': <function Parser.<lambda>>, 'EXECUTE': <function Parser.<lambda>>, 'EXTERNAL': <function Parser.<lambda>>, 'FALLBACK': <function Parser.<lambda>>, 'FORMAT': <function Parser.<lambda>>, 'FREESPACE': <function Parser.<lambda>>, 'HEAP': <function Parser.<lambda>>, 'IMMUTABLE': <function Parser.<lambda>>, 'INHERITS': <function Parser.<lambda>>, 'INPUT': <function Parser.<lambda>>, 'JOURNAL': <function Parser.<lambda>>, 'LANGUAGE': <function Parser.<lambda>>, 'LAYOUT': <function Parser.<lambda>>, 'LIFETIME': <function Parser.<lambda>>, 'LIKE': <function Parser.<lambda>>, 'LOCATION': <function Parser.<lambda>>, 'LOCK': <function Parser.<lambda>>, 'LOCKING': <function Parser.<lambda>>, 'LOG': <function Parser.<lambda>>, 'MATERIALIZED': <function Parser.<lambda>>, 'MERGEBLOCKRATIO': <function Parser.<lambda>>, 'MODIFIES': <function Parser.<lambda>>, 'MULTISET': <function Parser.<lambda>>, 'NO': <function Parser.<lambda>>, 'ON': <function Parser.<lambda>>, 'ORDER BY': <function Parser.<lambda>>, 'OUTPUT': <function Parser.<lambda>>, 'PARTITION': <function Parser.<lambda>>, 'PARTITION BY': <function Parser.<lambda>>, 'PARTITIONED BY': <function Parser.<lambda>>, 'PARTITIONED_BY': <function Parser.<lambda>>, 'PRIMARY KEY': <function Parser.<lambda>>, 'RANGE': <function Parser.<lambda>>, 'READS': <function Parser.<lambda>>, 'REMOTE': <function Parser.<lambda>>, 'RETURNS': <function Parser.<lambda>>, 'ROW': <function Parser.<lambda>>, 'ROW_FORMAT': <function Parser.<lambda>>, 'SAMPLE': <function Parser.<lambda>>, 'SET': <function Parser.<lambda>>, 'SETTINGS': <function Parser.<lambda>>, 'SORTKEY': <function Parser.<lambda>>, 'SOURCE': <function Parser.<lambda>>, 'STABLE': <function Parser.<lambda>>, 'STORED': <function Parser.<lambda>>, 'SYSTEM_VERSIONING': <function Parser.<lambda>>, 'TBLPROPERTIES': <function Parser.<lambda>>, 'TEMP': <function Parser.<lambda>>, 'TEMPORARY': <function Parser.<lambda>>, 'TO': <function Parser.<lambda>>, 'TRANSIENT': <function Parser.<lambda>>, 'TRANSFORM': <function Parser.<lambda>>, 'TTL': <function Parser.<lambda>>, 'USING': <function Parser.<lambda>>, 'VOLATILE': <function Parser.<lambda>>, 'WITH': <function Parser.<lambda>>, 'WITH SERDEPROPERTIES': <function Hive.Parser.<lambda>>}
SHOW_TRIE: Dict = {}
SET_TRIE: Dict = {'GLOBAL': {0: True}, 'LOCAL': {0: True}, 'SESSION': {0: True}, 'TRANSACTION': {0: True}}
Inherited Members
sqlglot.parser.Parser
Parser
NO_PAREN_FUNCTIONS
STRUCT_TYPE_TOKENS
NESTED_TYPE_TOKENS
ENUM_TYPE_TOKENS
AGGREGATE_TYPE_TOKENS
TYPE_TOKENS
SIGNED_TO_UNSIGNED_TYPE_TOKEN
SUBQUERY_PREDICATES
RESERVED_TOKENS
DB_CREATABLES
CREATABLES
ID_VAR_TOKENS
INTERVAL_VARS
TABLE_ALIAS_TOKENS
COMMENT_TABLE_ALIAS_TOKENS
UPDATE_ALIAS_TOKENS
TRIM_TYPES
FUNC_TOKENS
CONJUNCTION
EQUALITY
COMPARISON
BITWISE
TERM
FACTOR
EXPONENT
TIMES
TIMESTAMPS
SET_OPERATIONS
JOIN_METHODS
JOIN_SIDES
JOIN_KINDS
JOIN_HINTS
LAMBDAS
COLUMN_OPERATORS
EXPRESSION_PARSERS
STATEMENT_PARSERS
UNARY_PARSERS
PRIMARY_PARSERS
PLACEHOLDER_PARSERS
RANGE_PARSERS
CONSTRAINT_PARSERS
ALTER_PARSERS
SCHEMA_UNNAMED_CONSTRAINTS
INVALID_FUNC_NAME_TOKENS
FUNCTIONS_WITH_ALIASED_ARGS
FUNCTION_PARSERS
QUERY_MODIFIER_PARSERS
SET_PARSERS
SHOW_PARSERS
TYPE_LITERAL_PARSERS
MODIFIABLES
DDL_SELECT_TOKENS
PRE_VOLATILE_TOKENS
TRANSACTION_KIND
TRANSACTION_CHARACTERISTICS
INSERT_ALTERNATIVES
CLONE_KEYWORDS
HISTORICAL_DATA_KIND
OPCLASS_FOLLOW_KEYWORDS
OPTYPE_FOLLOW_TOKENS
TABLE_INDEX_HINT_TOKENS
WINDOW_ALIAS_TOKENS
WINDOW_BEFORE_PAREN_TOKENS
WINDOW_SIDES
JSON_KEY_VALUE_SEPARATOR_TOKENS
FETCH_TOKENS
ADD_CONSTRAINT_TOKENS
DISTINCT_TOKENS
NULL_TOKENS
UNNEST_OFFSET_ALIAS_TOKENS
PREFIXED_PIVOT_COLUMNS
IDENTIFY_PIVOT_STRINGS
ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
TABLESAMPLE_CSV
SET_REQUIRES_ASSIGNMENT_DELIMITER
TRIM_PATTERN_FIRST
STRING_ALIASES
MODIFIERS_ATTACHED_TO_UNION
UNION_MODIFIERS
NO_PAREN_IF_COMMANDS
JSON_ARROWS_REQUIRE_JSON_TYPE
error_level
error_message_context
max_errors
dialect
reset
parse
parse_into
check_errors
raise_error
expression
validate_expression
errors
sql
class Hive.Generator(sqlglot.generator.Generator):
415    class Generator(generator.Generator):
416        LIMIT_FETCH = "LIMIT"
417        TABLESAMPLE_WITH_METHOD = False
418        JOIN_HINTS = False
419        TABLE_HINTS = False
420        QUERY_HINTS = False
421        INDEX_ON = "ON TABLE"
422        EXTRACT_ALLOWS_QUOTES = False
423        NVL2_SUPPORTED = False
424        LAST_DAY_SUPPORTS_DATE_PART = False
425        JSON_PATH_SINGLE_QUOTE_ESCAPE = True
426
427        EXPRESSIONS_WITHOUT_NESTED_CTES = {
428            exp.Insert,
429            exp.Select,
430            exp.Subquery,
431            exp.Union,
432        }
433
434        SUPPORTED_JSON_PATH_PARTS = {
435            exp.JSONPathKey,
436            exp.JSONPathRoot,
437            exp.JSONPathSubscript,
438            exp.JSONPathWildcard,
439        }
440
441        TYPE_MAPPING = {
442            **generator.Generator.TYPE_MAPPING,
443            exp.DataType.Type.BIT: "BOOLEAN",
444            exp.DataType.Type.DATETIME: "TIMESTAMP",
445            exp.DataType.Type.TEXT: "STRING",
446            exp.DataType.Type.TIME: "TIMESTAMP",
447            exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP",
448            exp.DataType.Type.VARBINARY: "BINARY",
449        }
450
451        TRANSFORMS = {
452            **generator.Generator.TRANSFORMS,
453            exp.Group: transforms.preprocess([transforms.unalias_group]),
454            exp.Select: transforms.preprocess(
455                [
456                    transforms.eliminate_qualify,
457                    transforms.eliminate_distinct_on,
458                    transforms.unnest_to_explode,
459                ]
460            ),
461            exp.Property: _property_sql,
462            exp.AnyValue: rename_func("FIRST"),
463            exp.ApproxDistinct: approx_count_distinct_sql,
464            exp.ArgMax: arg_max_or_min_no_count("MAX_BY"),
465            exp.ArgMin: arg_max_or_min_no_count("MIN_BY"),
466            exp.ArrayConcat: rename_func("CONCAT"),
467            exp.ArrayJoin: lambda self, e: self.func("CONCAT_WS", e.expression, e.this),
468            exp.ArraySize: rename_func("SIZE"),
469            exp.ArraySort: _array_sort_sql,
470            exp.With: no_recursive_cte_sql,
471            exp.DateAdd: _add_date_sql,
472            exp.DateDiff: _date_diff_sql,
473            exp.DateStrToDate: datestrtodate_sql,
474            exp.DateSub: _add_date_sql,
475            exp.DateToDi: lambda self,
476            e: f"CAST(DATE_FORMAT({self.sql(e, 'this')}, {Hive.DATEINT_FORMAT}) AS INT)",
477            exp.DiToDate: lambda self,
478            e: f"TO_DATE(CAST({self.sql(e, 'this')} AS STRING), {Hive.DATEINT_FORMAT})",
479            exp.FileFormatProperty: lambda self,
480            e: f"STORED AS {self.sql(e, 'this') if isinstance(e.this, exp.InputOutputFormat) else e.name.upper()}",
481            exp.FromBase64: rename_func("UNBASE64"),
482            exp.If: if_sql(),
483            exp.ILike: no_ilike_sql,
484            exp.IsNan: rename_func("ISNAN"),
485            exp.JSONExtract: lambda self, e: self.func("GET_JSON_OBJECT", e.this, e.expression),
486            exp.JSONExtractScalar: lambda self, e: self.func(
487                "GET_JSON_OBJECT", e.this, e.expression
488            ),
489            exp.JSONFormat: _json_format_sql,
490            exp.Left: left_to_substring_sql,
491            exp.Map: var_map_sql,
492            exp.Max: max_or_greatest,
493            exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)),
494            exp.Min: min_or_least,
495            exp.MonthsBetween: lambda self, e: self.func("MONTHS_BETWEEN", e.this, e.expression),
496            exp.NotNullColumnConstraint: lambda _, e: (
497                "" if e.args.get("allow_null") else "NOT NULL"
498            ),
499            exp.VarMap: var_map_sql,
500            exp.Create: preprocess(
501                [
502                    remove_unique_constraints,
503                    ctas_with_tmp_tables_to_create_tmp_view,
504                    move_schema_columns_to_partitioned_by,
505                ]
506            ),
507            exp.Quantile: rename_func("PERCENTILE"),
508            exp.ApproxQuantile: rename_func("PERCENTILE_APPROX"),
509            exp.RegexpExtract: regexp_extract_sql,
510            exp.RegexpReplace: regexp_replace_sql,
511            exp.RegexpLike: lambda self, e: self.binary(e, "RLIKE"),
512            exp.RegexpSplit: rename_func("SPLIT"),
513            exp.Right: right_to_substring_sql,
514            exp.SafeDivide: no_safe_divide_sql,
515            exp.SchemaCommentProperty: lambda self, e: self.naked_property(e),
516            exp.ArrayUniqueAgg: rename_func("COLLECT_SET"),
517            exp.Split: lambda self, e: self.func(
518                "SPLIT", e.this, self.func("CONCAT", "'\\\\Q'", e.expression)
519            ),
520            exp.StrPosition: strposition_to_locate_sql,
521            exp.StrToDate: _str_to_date_sql,
522            exp.StrToTime: _str_to_time_sql,
523            exp.StrToUnix: _str_to_unix_sql,
524            exp.StructExtract: struct_extract_sql,
525            exp.TimeStrToDate: rename_func("TO_DATE"),
526            exp.TimeStrToTime: timestrtotime_sql,
527            exp.TimeStrToUnix: rename_func("UNIX_TIMESTAMP"),
528            exp.TimeToStr: lambda self, e: self.func("DATE_FORMAT", e.this, self.format_time(e)),
529            exp.TimeToUnix: rename_func("UNIX_TIMESTAMP"),
530            exp.ToBase64: rename_func("BASE64"),
531            exp.TsOrDiToDi: lambda self,
532            e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS STRING), '-', ''), 1, 8) AS INT)",
533            exp.TsOrDsAdd: _add_date_sql,
534            exp.TsOrDsDiff: _date_diff_sql,
535            exp.TsOrDsToDate: _to_date_sql,
536            exp.TryCast: no_trycast_sql,
537            exp.UnixToStr: lambda self, e: self.func(
538                "FROM_UNIXTIME", e.this, time_format("hive")(self, e)
539            ),
540            exp.UnixToTime: rename_func("FROM_UNIXTIME"),
541            exp.UnixToTimeStr: rename_func("FROM_UNIXTIME"),
542            exp.PartitionedByProperty: lambda self, e: f"PARTITIONED BY {self.sql(e, 'this')}",
543            exp.SerdeProperties: lambda self, e: self.properties(e, prefix="WITH SERDEPROPERTIES"),
544            exp.NumberToStr: rename_func("FORMAT_NUMBER"),
545            exp.National: lambda self, e: self.national_sql(e, prefix=""),
546            exp.ClusteredColumnConstraint: lambda self,
547            e: f"({self.expressions(e, 'this', indent=False)})",
548            exp.NonClusteredColumnConstraint: lambda self,
549            e: f"({self.expressions(e, 'this', indent=False)})",
550            exp.NotForReplicationColumnConstraint: lambda *_: "",
551            exp.OnProperty: lambda *_: "",
552            exp.PrimaryKeyColumnConstraint: lambda *_: "PRIMARY KEY",
553        }
554
555        PROPERTIES_LOCATION = {
556            **generator.Generator.PROPERTIES_LOCATION,
557            exp.FileFormatProperty: exp.Properties.Location.POST_SCHEMA,
558            exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA,
559            exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED,
560            exp.WithDataProperty: exp.Properties.Location.UNSUPPORTED,
561        }
562
563        def _jsonpathkey_sql(self, expression: exp.JSONPathKey) -> str:
564            if isinstance(expression.this, exp.JSONPathWildcard):
565                self.unsupported("Unsupported wildcard in JSONPathKey expression")
566                return ""
567
568            return super()._jsonpathkey_sql(expression)
569
570        def parameter_sql(self, expression: exp.Parameter) -> str:
571            this = self.sql(expression, "this")
572            expression_sql = self.sql(expression, "expression")
573
574            parent = expression.parent
575            this = f"{this}:{expression_sql}" if expression_sql else this
576
577            if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem):
578                # We need to produce SET key = value instead of SET ${key} = value
579                return this
580
581            return f"${{{this}}}"
582
583        def schema_sql(self, expression: exp.Schema) -> str:
584            for ordered in expression.find_all(exp.Ordered):
585                if ordered.args.get("desc") is False:
586                    ordered.set("desc", None)
587
588            return super().schema_sql(expression)
589
590        def constraint_sql(self, expression: exp.Constraint) -> str:
591            for prop in list(expression.find_all(exp.Properties)):
592                prop.pop()
593
594            this = self.sql(expression, "this")
595            expressions = self.expressions(expression, sep=" ", flat=True)
596            return f"CONSTRAINT {this} {expressions}"
597
598        def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str:
599            serde_props = self.sql(expression, "serde_properties")
600            serde_props = f" {serde_props}" if serde_props else ""
601            return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}"
602
603        def arrayagg_sql(self, expression: exp.ArrayAgg) -> str:
604            return self.func(
605                "COLLECT_LIST",
606                expression.this.this if isinstance(expression.this, exp.Order) else expression.this,
607            )
608
609        def with_properties(self, properties: exp.Properties) -> str:
610            return self.properties(properties, prefix=self.seg("TBLPROPERTIES"))
611
612        def datatype_sql(self, expression: exp.DataType) -> str:
613            if (
614                expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR)
615                and not expression.expressions
616            ):
617                expression = exp.DataType.build("text")
618            elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions:
619                expression.set("this", exp.DataType.Type.VARCHAR)
620            elif expression.this in exp.DataType.TEMPORAL_TYPES:
621                expression = exp.DataType.build(expression.this)
622            elif expression.is_type("float"):
623                size_expression = expression.find(exp.DataTypeParam)
624                if size_expression:
625                    size = int(size_expression.name)
626                    expression = (
627                        exp.DataType.build("float") if size <= 32 else exp.DataType.build("double")
628                    )
629
630            return super().datatype_sql(expression)
631
632        def version_sql(self, expression: exp.Version) -> str:
633            sql = super().version_sql(expression)
634            return sql.replace("FOR ", "", 1)

Generator converts a given syntax tree to the corresponding SQL string.

Arguments:
  • pretty: Whether to format the produced SQL string. Default: False.
  • identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
  • normalize: Whether to normalize identifiers to lowercase. Default: False.
  • pad: The pad size in a formatted string. Default: 2.
  • indent: The indentation size in a formatted string. Default: 2.
  • normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
  • unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
  • max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
  • leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
  • max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
  • comments: Whether to preserve comments in the output SQL code. Default: True
LIMIT_FETCH = 'LIMIT'
TABLESAMPLE_WITH_METHOD = False
JOIN_HINTS = False
TABLE_HINTS = False
QUERY_HINTS = False
INDEX_ON = 'ON TABLE'
EXTRACT_ALLOWS_QUOTES = False
NVL2_SUPPORTED = False
LAST_DAY_SUPPORTS_DATE_PART = False
JSON_PATH_SINGLE_QUOTE_ESCAPE = True
EXPRESSIONS_WITHOUT_NESTED_CTES = {<class 'sqlglot.expressions.Insert'>, <class 'sqlglot.expressions.Union'>, <class 'sqlglot.expressions.Subquery'>, <class 'sqlglot.expressions.Select'>}
TYPE_MAPPING = {<Type.NCHAR: 'NCHAR'>: 'CHAR', <Type.NVARCHAR: 'NVARCHAR'>: 'VARCHAR', <Type.MEDIUMTEXT: 'MEDIUMTEXT'>: 'TEXT', <Type.LONGTEXT: 'LONGTEXT'>: 'TEXT', <Type.TINYTEXT: 'TINYTEXT'>: 'TEXT', <Type.MEDIUMBLOB: 'MEDIUMBLOB'>: 'BLOB', <Type.LONGBLOB: 'LONGBLOB'>: 'BLOB', <Type.TINYBLOB: 'TINYBLOB'>: 'BLOB', <Type.INET: 'INET'>: 'INET', <Type.BIT: 'BIT'>: 'BOOLEAN', <Type.DATETIME: 'DATETIME'>: 'TIMESTAMP', <Type.TEXT: 'TEXT'>: 'STRING', <Type.TIME: 'TIME'>: 'TIMESTAMP', <Type.TIMESTAMPTZ: 'TIMESTAMPTZ'>: 'TIMESTAMP', <Type.VARBINARY: 'VARBINARY'>: 'BINARY'}
TRANSFORMS = {<class 'sqlglot.expressions.JSONPathKey'>: <function <lambda>>, <class 'sqlglot.expressions.JSONPathRoot'>: <function <lambda>>, <class 'sqlglot.expressions.JSONPathSubscript'>: <function <lambda>>, <class 'sqlglot.expressions.JSONPathWildcard'>: <function <lambda>>, <class 'sqlglot.expressions.AutoRefreshProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CaseSpecificColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CharacterSetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.CollateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CommentColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DateAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateFormatColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.DefaultColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.EncodeColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ExternalProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.HeapProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InheritsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InlineLengthColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.InputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.IntervalSpan'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.JSONExtract'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.LanguageProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LocationProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.LogProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.MaterializedProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NonClusteredColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.NotForReplicationColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnCommitProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OnProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.OnUpdateColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.OutputModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.PathColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ReturnsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SampleProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetConfigProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SetProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SettingsProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlReadWriteProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.StabilityProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TemporaryProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TitleColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Timestamp'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.ToTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransformModelProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.TransientProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.UppercaseColumnConstraint'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.VarMap'>: <function var_map_sql>, <class 'sqlglot.expressions.VolatileProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <function Generator.<lambda>>, <class 'sqlglot.expressions.Group'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Select'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Property'>: <function _property_sql>, <class 'sqlglot.expressions.AnyValue'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxDistinct'>: <function approx_count_distinct_sql>, <class 'sqlglot.expressions.ArgMax'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArgMin'>: <function arg_max_or_min_no_count.<locals>._arg_max_or_min_sql>, <class 'sqlglot.expressions.ArrayConcat'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArrayJoin'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArraySize'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ArraySort'>: <function _array_sort_sql>, <class 'sqlglot.expressions.With'>: <function no_recursive_cte_sql>, <class 'sqlglot.expressions.DateDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.DateStrToDate'>: <function datestrtodate_sql>, <class 'sqlglot.expressions.DateSub'>: <function _add_date_sql>, <class 'sqlglot.expressions.DateToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.DiToDate'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FileFormatProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.FromBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.If'>: <function if_sql.<locals>._if_sql>, <class 'sqlglot.expressions.ILike'>: <function no_ilike_sql>, <class 'sqlglot.expressions.IsNan'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.JSONFormat'>: <function _json_format_sql>, <class 'sqlglot.expressions.Left'>: <function left_to_substring_sql>, <class 'sqlglot.expressions.Map'>: <function var_map_sql>, <class 'sqlglot.expressions.Max'>: <function max_or_greatest>, <class 'sqlglot.expressions.MD5Digest'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Min'>: <function min_or_least>, <class 'sqlglot.expressions.MonthsBetween'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NotNullColumnConstraint'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.Create'>: <function preprocess.<locals>._to_sql>, <class 'sqlglot.expressions.Quantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ApproxQuantile'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.RegexpExtract'>: <function regexp_extract_sql>, <class 'sqlglot.expressions.RegexpReplace'>: <function regexp_replace_sql>, <class 'sqlglot.expressions.RegexpLike'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.RegexpSplit'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Right'>: <function right_to_substring_sql>, <class 'sqlglot.expressions.SafeDivide'>: <function no_safe_divide_sql>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.ArrayUniqueAgg'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.Split'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.StrPosition'>: <function strposition_to_locate_sql>, <class 'sqlglot.expressions.StrToDate'>: <function _str_to_date_sql>, <class 'sqlglot.expressions.StrToTime'>: <function _str_to_time_sql>, <class 'sqlglot.expressions.StrToUnix'>: <function _str_to_unix_sql>, <class 'sqlglot.expressions.StructExtract'>: <function struct_extract_sql>, <class 'sqlglot.expressions.TimeStrToDate'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeStrToTime'>: <function timestrtotime_sql>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TimeToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TimeToUnix'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.ToBase64'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function _add_date_sql>, <class 'sqlglot.expressions.TsOrDsDiff'>: <function _date_diff_sql>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _to_date_sql>, <class 'sqlglot.expressions.TryCast'>: <function no_trycast_sql>, <class 'sqlglot.expressions.UnixToStr'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.UnixToTime'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.PartitionedByProperty'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.SerdeProperties'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.NumberToStr'>: <function rename_func.<locals>.<lambda>>, <class 'sqlglot.expressions.National'>: <function Hive.Generator.<lambda>>, <class 'sqlglot.expressions.PrimaryKeyColumnConstraint'>: <function Hive.Generator.<lambda>>}
PROPERTIES_LOCATION = {<class 'sqlglot.expressions.AlgorithmProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.AutoIncrementProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.AutoRefreshProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.BlockCompressionProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CharacterSetProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ChecksumProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.CollateProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.CopyGrantsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Cluster'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ClusteredByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DataBlocksizeProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.DefinerProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.DictRange'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DictProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.DistStyleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.EngineProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExecuteAsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ExternalProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.FallbackProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.FileFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.FreespaceProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.HeapProperty'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.InheritsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.InputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.IsolatedLoadingProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.JournalProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.LanguageProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LikeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LocationProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.LockingProperty'>: <Location.POST_ALIAS: 'POST_ALIAS'>, <class 'sqlglot.expressions.LogProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.MaterializedProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.MergeBlockRatioProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.NoPrimaryIndexProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.OnProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OnCommitProperty'>: <Location.POST_EXPRESSION: 'POST_EXPRESSION'>, <class 'sqlglot.expressions.Order'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.OutputModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedByProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PartitionedOfProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.PrimaryKey'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Property'>: <Location.POST_WITH: 'POST_WITH'>, <class 'sqlglot.expressions.RemoteWithConnectionModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.ReturnsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatDelimitedProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.RowFormatSerdeProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SampleProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SchemaCommentProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SerdeProperties'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.Set'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SettingsProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SetProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.SetConfigProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SortKeyProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlReadWriteProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.SqlSecurityProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.StabilityProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TemporaryProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.ToTableProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.TransientProperty'>: <Location.POST_CREATE: 'POST_CREATE'>, <class 'sqlglot.expressions.TransformModelProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.MergeTreeTTL'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>, <class 'sqlglot.expressions.VolatileProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithDataProperty'>: <Location.UNSUPPORTED: 'UNSUPPORTED'>, <class 'sqlglot.expressions.WithJournalTableProperty'>: <Location.POST_NAME: 'POST_NAME'>, <class 'sqlglot.expressions.WithSystemVersioningProperty'>: <Location.POST_SCHEMA: 'POST_SCHEMA'>}
def parameter_sql(self, expression: sqlglot.expressions.Parameter) -> str:
570        def parameter_sql(self, expression: exp.Parameter) -> str:
571            this = self.sql(expression, "this")
572            expression_sql = self.sql(expression, "expression")
573
574            parent = expression.parent
575            this = f"{this}:{expression_sql}" if expression_sql else this
576
577            if isinstance(parent, exp.EQ) and isinstance(parent.parent, exp.SetItem):
578                # We need to produce SET key = value instead of SET ${key} = value
579                return this
580
581            return f"${{{this}}}"
def schema_sql(self, expression: sqlglot.expressions.Schema) -> str:
583        def schema_sql(self, expression: exp.Schema) -> str:
584            for ordered in expression.find_all(exp.Ordered):
585                if ordered.args.get("desc") is False:
586                    ordered.set("desc", None)
587
588            return super().schema_sql(expression)
def constraint_sql(self, expression: sqlglot.expressions.Constraint) -> str:
590        def constraint_sql(self, expression: exp.Constraint) -> str:
591            for prop in list(expression.find_all(exp.Properties)):
592                prop.pop()
593
594            this = self.sql(expression, "this")
595            expressions = self.expressions(expression, sep=" ", flat=True)
596            return f"CONSTRAINT {this} {expressions}"
def rowformatserdeproperty_sql(self, expression: sqlglot.expressions.RowFormatSerdeProperty) -> str:
598        def rowformatserdeproperty_sql(self, expression: exp.RowFormatSerdeProperty) -> str:
599            serde_props = self.sql(expression, "serde_properties")
600            serde_props = f" {serde_props}" if serde_props else ""
601            return f"ROW FORMAT SERDE {self.sql(expression, 'this')}{serde_props}"
def arrayagg_sql(self, expression: sqlglot.expressions.ArrayAgg) -> str:
603        def arrayagg_sql(self, expression: exp.ArrayAgg) -> str:
604            return self.func(
605                "COLLECT_LIST",
606                expression.this.this if isinstance(expression.this, exp.Order) else expression.this,
607            )
def with_properties(self, properties: sqlglot.expressions.Properties) -> str:
609        def with_properties(self, properties: exp.Properties) -> str:
610            return self.properties(properties, prefix=self.seg("TBLPROPERTIES"))
def datatype_sql(self, expression: sqlglot.expressions.DataType) -> str:
612        def datatype_sql(self, expression: exp.DataType) -> str:
613            if (
614                expression.this in (exp.DataType.Type.VARCHAR, exp.DataType.Type.NVARCHAR)
615                and not expression.expressions
616            ):
617                expression = exp.DataType.build("text")
618            elif expression.is_type(exp.DataType.Type.TEXT) and expression.expressions:
619                expression.set("this", exp.DataType.Type.VARCHAR)
620            elif expression.this in exp.DataType.TEMPORAL_TYPES:
621                expression = exp.DataType.build(expression.this)
622            elif expression.is_type("float"):
623                size_expression = expression.find(exp.DataTypeParam)
624                if size_expression:
625                    size = int(size_expression.name)
626                    expression = (
627                        exp.DataType.build("float") if size <= 32 else exp.DataType.build("double")
628                    )
629
630            return super().datatype_sql(expression)
def version_sql(self, expression: sqlglot.expressions.Version) -> str:
632        def version_sql(self, expression: exp.Version) -> str:
633            sql = super().version_sql(expression)
634            return sql.replace("FOR ", "", 1)
SELECT_KINDS: Tuple[str, ...] = ()
Inherited Members
sqlglot.generator.Generator
Generator
NULL_ORDERING_SUPPORTED
IGNORE_NULLS_IN_FUNC
LOCKING_READS_SUPPORTED
EXPLICIT_UNION
WRAP_DERIVED_VALUES
CREATE_FUNCTION_RETURN_AS
MATCHED_BY_SOURCE
SINGLE_STRING_INTERVAL
INTERVAL_ALLOWS_PLURAL_FORM
LIMIT_ONLY_LITERALS
RENAME_TABLE_WITH_DB
GROUPINGS_SEP
QUERY_HINT_SEP
IS_BOOL_ALLOWED
DUPLICATE_KEY_UPDATE_WITH_SET
LIMIT_IS_TOP
RETURNING_END
COLUMN_JOIN_MARKS_SUPPORTED
TZ_TO_WITH_TIME_ZONE
VALUES_AS_TABLE
ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
UNNEST_WITH_ORDINALITY
AGGREGATE_FILTER_SUPPORTED
SEMI_ANTI_JOIN_WITH_SIDE
COMPUTED_COLUMN_WITH_TYPE
SUPPORTS_TABLE_COPY
TABLESAMPLE_REQUIRES_PARENS
TABLESAMPLE_SIZE_IS_ROWS
TABLESAMPLE_KEYWORDS
TABLESAMPLE_SEED_KEYWORD
COLLATE_IS_FUNC
DATA_TYPE_SPECIFIERS_ALLOWED
ENSURE_BOOLS
CTE_RECURSIVE_KEYWORD_REQUIRED
SUPPORTS_SINGLE_ARG_CONCAT
SUPPORTS_TABLE_ALIAS_COLUMNS
UNPIVOT_ALIASES_ARE_IDENTIFIERS
JSON_KEY_VALUE_PAIR_SEP
INSERT_OVERWRITE
SUPPORTS_SELECT_INTO
SUPPORTS_UNLOGGED_TABLES
SUPPORTS_CREATE_TABLE_LIKE
LIKE_PROPERTY_INSIDE_SCHEMA
MULTI_ARG_DISTINCT
JSON_TYPE_REQUIRED_FOR_EXTRACTION
JSON_PATH_BRACKETED_KEY_SUPPORTED
CAN_IMPLEMENT_ARRAY_ANY
STAR_MAPPING
TIME_PART_SINGULARS
TOKEN_MAPPING
STRUCT_DELIMITER
PARAMETER_TOKEN
NAMED_PLACEHOLDER_TOKEN
RESERVED_KEYWORDS
WITH_SEPARATED_COMMENTS
EXCLUDE_COMMENTS
UNWRAPPED_INTERVAL_VALUES
KEY_VALUE_DEFINITIONS
SENTINEL_LINE_BREAK
pretty
identify
normalize
pad
unsupported_level
max_unsupported
leading_comma
max_text_width
comments
dialect
normalize_functions
unsupported_messages
generate
preprocess
unsupported
sep
seg
pad_comment
maybe_comment
wrap
no_identify
normalize_func
indent
sql
uncache_sql
cache_sql
characterset_sql
column_sql
columnposition_sql
columndef_sql
columnconstraint_sql
computedcolumnconstraint_sql
autoincrementcolumnconstraint_sql
compresscolumnconstraint_sql
generatedasidentitycolumnconstraint_sql
generatedasrowcolumnconstraint_sql
periodforsystemtimeconstraint_sql
notnullcolumnconstraint_sql
transformcolumnconstraint_sql
primarykeycolumnconstraint_sql
uniquecolumnconstraint_sql
createable_sql
create_sql
clone_sql
describe_sql
heredoc_sql
prepend_ctes
with_sql
cte_sql
tablealias_sql
bitstring_sql
hexstring_sql
bytestring_sql
unicodestring_sql
rawstring_sql
datatypeparam_sql
directory_sql
delete_sql
drop_sql
except_sql
except_op
fetch_sql
filter_sql
hint_sql
index_sql
identifier_sql
inputoutputformat_sql
national_sql
partition_sql
properties_sql
root_properties
properties
locate_properties
property_name
property_sql
likeproperty_sql
fallbackproperty_sql
journalproperty_sql
freespaceproperty_sql
checksumproperty_sql
mergeblockratioproperty_sql
datablocksizeproperty_sql
blockcompressionproperty_sql
isolatedloadingproperty_sql
partitionboundspec_sql
partitionedofproperty_sql
lockingproperty_sql
withdataproperty_sql
withsystemversioningproperty_sql
insert_sql
intersect_sql
intersect_op
introducer_sql
kill_sql
pseudotype_sql
objectidentifier_sql
onconflict_sql
returning_sql
rowformatdelimitedproperty_sql
withtablehint_sql
indextablehint_sql
historicaldata_sql
table_sql
tablesample_sql
pivot_sql
tuple_sql
update_sql
values_sql
var_sql
into_sql
from_sql
group_sql
having_sql
connect_sql
prior_sql
join_sql
lambda_sql
lateral_op
lateral_sql
limit_sql
offset_sql
setitem_sql
set_sql
pragma_sql
lock_sql
literal_sql
escape_str
loaddata_sql
null_sql
boolean_sql
order_sql
withfill_sql
cluster_sql
distribute_sql
sort_sql
ordered_sql
matchrecognize_sql
query_modifiers
offset_limit_modifiers
after_having_modifiers
after_limit_modifiers
select_sql
schema_columns_sql
star_sql
sessionparameter_sql
placeholder_sql
subquery_sql
qualify_sql
union_sql
union_op
unnest_sql
where_sql
window_sql
partition_by_sql
windowspec_sql
withingroup_sql
between_sql
bracket_sql
all_sql
any_sql
exists_sql
case_sql
nextvaluefor_sql
extract_sql
trim_sql
convert_concat_args
concat_sql
concatws_sql
check_sql
foreignkey_sql
primarykey_sql
if_sql
matchagainst_sql
jsonkeyvalue_sql
jsonpath_sql
json_path_part
formatjson_sql
jsonobject_sql
jsonobjectagg_sql
jsonarray_sql
jsonarrayagg_sql
jsoncolumndef_sql
jsonschema_sql
jsontable_sql
openjsoncolumndef_sql
openjson_sql
in_sql
in_unnest_op
interval_sql
return_sql
reference_sql
anonymous_sql
paren_sql
neg_sql
not_sql
alias_sql
pivotalias_sql
aliases_sql
atindex_sql
attimezone_sql
fromtimezone_sql
add_sql
and_sql
xor_sql
connector_sql
bitwiseand_sql
bitwiseleftshift_sql
bitwisenot_sql
bitwiseor_sql
bitwiserightshift_sql
bitwisexor_sql
cast_sql
currentdate_sql
currenttimestamp_sql
collate_sql
command_sql
comment_sql
mergetreettlaction_sql
mergetreettl_sql
transaction_sql
commit_sql
rollback_sql
altercolumn_sql
renametable_sql
renamecolumn_sql
altertable_sql
add_column_sql
droppartition_sql
addconstraint_sql
distinct_sql
ignorenulls_sql
respectnulls_sql
havingmax_sql
intdiv_sql
dpipe_sql
div_sql
overlaps_sql
distance_sql
dot_sql
eq_sql
propertyeq_sql
escape_sql
glob_sql
gt_sql
gte_sql
ilike_sql
ilikeany_sql
is_sql
like_sql
likeany_sql
similarto_sql
lt_sql
lte_sql
mod_sql
mul_sql
neq_sql
nullsafeeq_sql
nullsafeneq_sql
or_sql
slice_sql
sub_sql
trycast_sql
log_sql
use_sql
binary
function_fallback_sql
func
format_args
text_width
format_time
expressions
op_expressions
naked_property
set_operation
tag_sql
token_sql
userdefinedfunction_sql
joinhint_sql
kwarg_sql
when_sql
merge_sql
tochar_sql
dictproperty_sql
dictrange_sql
dictsubproperty_sql
oncluster_sql
clusteredbyproperty_sql
anyvalue_sql
querytransform_sql
indexconstraintoption_sql
checkcolumnconstraint_sql
indexcolumnconstraint_sql
nvl2_sql
comprehension_sql
columnprefix_sql
opclass_sql
predict_sql
forin_sql
refresh_sql
operator_sql
toarray_sql
tsordstotime_sql
tsordstodate_sql
unixdate_sql
lastday_sql
arrayany_sql