sqlglot.dialects.duckdb
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, parser, tokens, transforms 6from sqlglot.expressions import DATA_TYPE 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 JSON_EXTRACT_TYPE, 10 NormalizationStrategy, 11 approx_count_distinct_sql, 12 arg_max_or_min_no_count, 13 arrow_json_extract_sql, 14 binary_from_function, 15 bool_xor_sql, 16 build_default_decimal_type, 17 date_trunc_to_time, 18 datestrtodate_sql, 19 no_datetime_sql, 20 encode_decode_sql, 21 build_formatted_time, 22 inline_array_unless_query, 23 no_comment_column_constraint_sql, 24 no_safe_divide_sql, 25 no_time_sql, 26 no_timestamp_sql, 27 pivot_column_names, 28 regexp_extract_sql, 29 rename_func, 30 str_position_sql, 31 str_to_time_sql, 32 timestamptrunc_sql, 33 timestrtotime_sql, 34 unit_to_var, 35 unit_to_str, 36 sha256_sql, 37 build_regexp_extract, 38) 39from sqlglot.helper import seq_get 40from sqlglot.tokens import TokenType 41 42DATETIME_DELTA = t.Union[ 43 exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd, exp.DateSub, exp.DatetimeSub 44] 45 46WINDOW_FUNCS_WITH_IGNORE_NULLS = ( 47 exp.FirstValue, 48 exp.LastValue, 49 exp.Lag, 50 exp.Lead, 51 exp.NthValue, 52) 53 54 55def _date_delta_sql(self: DuckDB.Generator, expression: DATETIME_DELTA) -> str: 56 this = expression.this 57 unit = unit_to_var(expression) 58 op = ( 59 "+" 60 if isinstance(expression, (exp.DateAdd, exp.TimeAdd, exp.DatetimeAdd, exp.TsOrDsAdd)) 61 else "-" 62 ) 63 64 to_type: t.Optional[DATA_TYPE] = None 65 if isinstance(expression, exp.TsOrDsAdd): 66 to_type = expression.return_type 67 elif this.is_string: 68 # Cast string literals (i.e function parameters) to the appropriate type for +/- interval to work 69 to_type = ( 70 exp.DataType.Type.DATETIME 71 if isinstance(expression, (exp.DatetimeAdd, exp.DatetimeSub)) 72 else exp.DataType.Type.DATE 73 ) 74 75 this = exp.cast(this, to_type) if to_type else this 76 77 return f"{self.sql(this)} {op} {self.sql(exp.Interval(this=expression.expression, unit=unit))}" 78 79 80# BigQuery -> DuckDB conversion for the DATE function 81def _date_sql(self: DuckDB.Generator, expression: exp.Date) -> str: 82 result = f"CAST({self.sql(expression, 'this')} AS DATE)" 83 zone = self.sql(expression, "zone") 84 85 if zone: 86 date_str = self.func("STRFTIME", result, "'%d/%m/%Y'") 87 date_str = f"{date_str} || ' ' || {zone}" 88 89 # This will create a TIMESTAMP with time zone information 90 result = self.func("STRPTIME", date_str, "'%d/%m/%Y %Z'") 91 92 return result 93 94 95# BigQuery -> DuckDB conversion for the TIME_DIFF function 96def _timediff_sql(self: DuckDB.Generator, expression: exp.TimeDiff) -> str: 97 this = exp.cast(expression.this, exp.DataType.Type.TIME) 98 expr = exp.cast(expression.expression, exp.DataType.Type.TIME) 99 100 # Although the 2 dialects share similar signatures, BQ seems to inverse 101 # the sign of the result so the start/end time operands are flipped 102 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 103 104 105@generator.unsupported_args(("expression", "DuckDB's ARRAY_SORT does not support a comparator.")) 106def _array_sort_sql(self: DuckDB.Generator, expression: exp.ArraySort) -> str: 107 return self.func("ARRAY_SORT", expression.this) 108 109 110def _sort_array_sql(self: DuckDB.Generator, expression: exp.SortArray) -> str: 111 name = "ARRAY_REVERSE_SORT" if expression.args.get("asc") == exp.false() else "ARRAY_SORT" 112 return self.func(name, expression.this) 113 114 115def _build_sort_array_desc(args: t.List) -> exp.Expression: 116 return exp.SortArray(this=seq_get(args, 0), asc=exp.false()) 117 118 119def _build_date_diff(args: t.List) -> exp.Expression: 120 return exp.DateDiff(this=seq_get(args, 2), expression=seq_get(args, 1), unit=seq_get(args, 0)) 121 122 123def _build_generate_series(end_exclusive: bool = False) -> t.Callable[[t.List], exp.GenerateSeries]: 124 def _builder(args: t.List) -> exp.GenerateSeries: 125 # Check https://duckdb.org/docs/sql/functions/nested.html#range-functions 126 if len(args) == 1: 127 # DuckDB uses 0 as a default for the series' start when it's omitted 128 args.insert(0, exp.Literal.number("0")) 129 130 gen_series = exp.GenerateSeries.from_arg_list(args) 131 gen_series.set("is_end_exclusive", end_exclusive) 132 133 return gen_series 134 135 return _builder 136 137 138def _build_make_timestamp(args: t.List) -> exp.Expression: 139 if len(args) == 1: 140 return exp.UnixToTime(this=seq_get(args, 0), scale=exp.UnixToTime.MICROS) 141 142 return exp.TimestampFromParts( 143 year=seq_get(args, 0), 144 month=seq_get(args, 1), 145 day=seq_get(args, 2), 146 hour=seq_get(args, 3), 147 min=seq_get(args, 4), 148 sec=seq_get(args, 5), 149 ) 150 151 152def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str: 153 args: t.List[str] = [] 154 155 # BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is 156 # canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB 157 # The transformation to ROW will take place if a cast to STRUCT / ARRAY of STRUCTs is found 158 ancestor_cast = expression.find_ancestor(exp.Cast) 159 is_struct_cast = ancestor_cast and any( 160 casted_type.is_type(exp.DataType.Type.STRUCT) 161 for casted_type in ancestor_cast.find_all(exp.DataType) 162 ) 163 164 for i, expr in enumerate(expression.expressions): 165 is_property_eq = isinstance(expr, exp.PropertyEQ) 166 value = expr.expression if is_property_eq else expr 167 168 if is_struct_cast: 169 args.append(self.sql(value)) 170 else: 171 key = expr.name if is_property_eq else f"_{i}" 172 args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}") 173 174 csv_args = ", ".join(args) 175 176 return f"ROW({csv_args})" if is_struct_cast else f"{{{csv_args}}}" 177 178 179def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str: 180 if expression.is_type("array"): 181 return f"{self.expressions(expression, flat=True)}[{self.expressions(expression, key='values', flat=True)}]" 182 183 # Modifiers are not supported for TIME, [TIME | TIMESTAMP] WITH TIME ZONE 184 if expression.is_type( 185 exp.DataType.Type.TIME, exp.DataType.Type.TIMETZ, exp.DataType.Type.TIMESTAMPTZ 186 ): 187 return expression.this.value 188 189 return self.datatype_sql(expression) 190 191 192def _json_format_sql(self: DuckDB.Generator, expression: exp.JSONFormat) -> str: 193 sql = self.func("TO_JSON", expression.this, expression.args.get("options")) 194 return f"CAST({sql} AS TEXT)" 195 196 197def _unix_to_time_sql(self: DuckDB.Generator, expression: exp.UnixToTime) -> str: 198 scale = expression.args.get("scale") 199 timestamp = expression.this 200 201 if scale in (None, exp.UnixToTime.SECONDS): 202 return self.func("TO_TIMESTAMP", timestamp) 203 if scale == exp.UnixToTime.MILLIS: 204 return self.func("EPOCH_MS", timestamp) 205 if scale == exp.UnixToTime.MICROS: 206 return self.func("MAKE_TIMESTAMP", timestamp) 207 208 return self.func("TO_TIMESTAMP", exp.Div(this=timestamp, expression=exp.func("POW", 10, scale))) 209 210 211WRAPPED_JSON_EXTRACT_EXPRESSIONS = (exp.Binary, exp.Bracket, exp.In) 212 213 214def _arrow_json_extract_sql(self: DuckDB.Generator, expression: JSON_EXTRACT_TYPE) -> str: 215 arrow_sql = arrow_json_extract_sql(self, expression) 216 if not expression.same_parent and isinstance( 217 expression.parent, WRAPPED_JSON_EXTRACT_EXPRESSIONS 218 ): 219 arrow_sql = self.wrap(arrow_sql) 220 return arrow_sql 221 222 223def _implicit_datetime_cast( 224 arg: t.Optional[exp.Expression], type: exp.DataType.Type = exp.DataType.Type.DATE 225) -> t.Optional[exp.Expression]: 226 return exp.cast(arg, type) if isinstance(arg, exp.Literal) else arg 227 228 229def _date_diff_sql(self: DuckDB.Generator, expression: exp.DateDiff) -> str: 230 this = _implicit_datetime_cast(expression.this) 231 expr = _implicit_datetime_cast(expression.expression) 232 233 return self.func("DATE_DIFF", unit_to_str(expression), expr, this) 234 235 236def _generate_datetime_array_sql( 237 self: DuckDB.Generator, expression: t.Union[exp.GenerateDateArray, exp.GenerateTimestampArray] 238) -> str: 239 is_generate_date_array = isinstance(expression, exp.GenerateDateArray) 240 241 type = exp.DataType.Type.DATE if is_generate_date_array else exp.DataType.Type.TIMESTAMP 242 start = _implicit_datetime_cast(expression.args.get("start"), type=type) 243 end = _implicit_datetime_cast(expression.args.get("end"), type=type) 244 245 # BQ's GENERATE_DATE_ARRAY & GENERATE_TIMESTAMP_ARRAY are transformed to DuckDB'S GENERATE_SERIES 246 gen_series: t.Union[exp.GenerateSeries, exp.Cast] = exp.GenerateSeries( 247 start=start, end=end, step=expression.args.get("step") 248 ) 249 250 if is_generate_date_array: 251 # The GENERATE_SERIES result type is TIMESTAMP array, so to match BQ's semantics for 252 # GENERATE_DATE_ARRAY we must cast it back to DATE array 253 gen_series = exp.cast(gen_series, exp.DataType.build("ARRAY<DATE>")) 254 255 return self.sql(gen_series) 256 257 258class DuckDB(Dialect): 259 NULL_ORDERING = "nulls_are_last" 260 SUPPORTS_USER_DEFINED_TYPES = False 261 SAFE_DIVISION = True 262 INDEX_OFFSET = 1 263 CONCAT_COALESCE = True 264 SUPPORTS_ORDER_BY_ALL = True 265 SUPPORTS_FIXED_SIZE_ARRAYS = True 266 STRICT_JSON_PATH_SYNTAX = False 267 268 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 269 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 270 271 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 272 if isinstance(path, exp.Literal): 273 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 274 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 275 # This check ensures we'll avoid trying to parse these as JSON paths, which can 276 # either result in a noisy warning or in an invalid representation of the path. 277 path_text = path.name 278 if path_text.startswith("/") or "[#" in path_text: 279 return path 280 281 return super().to_json_path(path) 282 283 class Tokenizer(tokens.Tokenizer): 284 HEREDOC_STRINGS = ["$"] 285 286 HEREDOC_TAG_IS_IDENTIFIER = True 287 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 288 289 KEYWORDS = { 290 **tokens.Tokenizer.KEYWORDS, 291 "//": TokenType.DIV, 292 "**": TokenType.DSTAR, 293 "ATTACH": TokenType.COMMAND, 294 "BINARY": TokenType.VARBINARY, 295 "BITSTRING": TokenType.BIT, 296 "BPCHAR": TokenType.TEXT, 297 "CHAR": TokenType.TEXT, 298 "CHARACTER VARYING": TokenType.TEXT, 299 "EXCLUDE": TokenType.EXCEPT, 300 "LOGICAL": TokenType.BOOLEAN, 301 "ONLY": TokenType.ONLY, 302 "PIVOT_WIDER": TokenType.PIVOT, 303 "POSITIONAL": TokenType.POSITIONAL, 304 "SIGNED": TokenType.INT, 305 "STRING": TokenType.TEXT, 306 "SUMMARIZE": TokenType.SUMMARIZE, 307 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 308 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 309 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 310 "TIMESTAMP_US": TokenType.TIMESTAMP, 311 "UBIGINT": TokenType.UBIGINT, 312 "UINTEGER": TokenType.UINT, 313 "USMALLINT": TokenType.USMALLINT, 314 "UTINYINT": TokenType.UTINYINT, 315 "VARCHAR": TokenType.TEXT, 316 } 317 KEYWORDS.pop("/*+") 318 319 SINGLE_TOKENS = { 320 **tokens.Tokenizer.SINGLE_TOKENS, 321 "$": TokenType.PARAMETER, 322 } 323 324 class Parser(parser.Parser): 325 BITWISE = { 326 **parser.Parser.BITWISE, 327 TokenType.TILDA: exp.RegexpLike, 328 } 329 BITWISE.pop(TokenType.CARET) 330 331 EXPONENT = { 332 **parser.Parser.EXPONENT, 333 TokenType.CARET: exp.Pow, 334 TokenType.DSTAR: exp.Pow, 335 } 336 337 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 338 339 FUNCTIONS = { 340 **parser.Parser.FUNCTIONS, 341 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 342 "ARRAY_SORT": exp.SortArray.from_arg_list, 343 "DATEDIFF": _build_date_diff, 344 "DATE_DIFF": _build_date_diff, 345 "DATE_TRUNC": date_trunc_to_time, 346 "DATETRUNC": date_trunc_to_time, 347 "DECODE": lambda args: exp.Decode( 348 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 349 ), 350 "ENCODE": lambda args: exp.Encode( 351 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 352 ), 353 "EPOCH": exp.TimeToUnix.from_arg_list, 354 "EPOCH_MS": lambda args: exp.UnixToTime( 355 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 356 ), 357 "JSON": exp.ParseJSON.from_arg_list, 358 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 359 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 360 "LIST_HAS": exp.ArrayContains.from_arg_list, 361 "LIST_REVERSE_SORT": _build_sort_array_desc, 362 "LIST_SORT": exp.SortArray.from_arg_list, 363 "LIST_VALUE": lambda args: exp.Array(expressions=args), 364 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 365 "MAKE_TIMESTAMP": _build_make_timestamp, 366 "MEDIAN": lambda args: exp.PercentileCont( 367 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 368 ), 369 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 370 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 371 "REGEXP_EXTRACT": build_regexp_extract, 372 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 373 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 374 this=seq_get(args, 0), 375 expression=seq_get(args, 1), 376 replacement=seq_get(args, 2), 377 modifiers=seq_get(args, 3), 378 ), 379 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 380 "STRING_SPLIT": exp.Split.from_arg_list, 381 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 382 "STRING_TO_ARRAY": exp.Split.from_arg_list, 383 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 384 "STRUCT_PACK": exp.Struct.from_arg_list, 385 "STR_SPLIT": exp.Split.from_arg_list, 386 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 387 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 388 "UNNEST": exp.Explode.from_arg_list, 389 "XOR": binary_from_function(exp.BitwiseXor), 390 "GENERATE_SERIES": _build_generate_series(), 391 "RANGE": _build_generate_series(end_exclusive=True), 392 } 393 394 FUNCTIONS.pop("DATE_SUB") 395 FUNCTIONS.pop("GLOB") 396 397 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 398 FUNCTION_PARSERS.pop("DECODE") 399 400 NO_PAREN_FUNCTION_PARSERS = { 401 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 402 "MAP": lambda self: self._parse_map(), 403 } 404 405 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 406 TokenType.SEMI, 407 TokenType.ANTI, 408 } 409 410 PLACEHOLDER_PARSERS = { 411 **parser.Parser.PLACEHOLDER_PARSERS, 412 TokenType.PARAMETER: lambda self: ( 413 self.expression(exp.Placeholder, this=self._prev.text) 414 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 415 else None 416 ), 417 } 418 419 TYPE_CONVERTERS = { 420 # https://duckdb.org/docs/sql/data_types/numeric 421 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 422 # https://duckdb.org/docs/sql/data_types/text 423 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 424 } 425 426 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 427 # https://duckdb.org/docs/sql/samples.html 428 sample = super()._parse_table_sample(as_modifier=as_modifier) 429 if sample and not sample.args.get("method"): 430 if sample.args.get("size"): 431 sample.set("method", exp.var("RESERVOIR")) 432 else: 433 sample.set("method", exp.var("SYSTEM")) 434 435 return sample 436 437 def _parse_bracket( 438 self, this: t.Optional[exp.Expression] = None 439 ) -> t.Optional[exp.Expression]: 440 bracket = super()._parse_bracket(this) 441 if isinstance(bracket, exp.Bracket): 442 bracket.set("returns_list_for_maps", True) 443 444 return bracket 445 446 def _parse_map(self) -> exp.ToMap | exp.Map: 447 if self._match(TokenType.L_BRACE, advance=False): 448 return self.expression(exp.ToMap, this=self._parse_bracket()) 449 450 args = self._parse_wrapped_csv(self._parse_assignment) 451 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 452 453 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 454 return self._parse_field_def() 455 456 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 457 if len(aggregations) == 1: 458 return super()._pivot_column_names(aggregations) 459 return pivot_column_names(aggregations, dialect="duckdb") 460 461 class Generator(generator.Generator): 462 PARAMETER_TOKEN = "$" 463 NAMED_PLACEHOLDER_TOKEN = "$" 464 JOIN_HINTS = False 465 TABLE_HINTS = False 466 QUERY_HINTS = False 467 LIMIT_FETCH = "LIMIT" 468 STRUCT_DELIMITER = ("(", ")") 469 RENAME_TABLE_WITH_DB = False 470 NVL2_SUPPORTED = False 471 SEMI_ANTI_JOIN_WITH_SIDE = False 472 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 473 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 474 LAST_DAY_SUPPORTS_DATE_PART = False 475 JSON_KEY_VALUE_PAIR_SEP = "," 476 IGNORE_NULLS_IN_FUNC = True 477 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 478 SUPPORTS_CREATE_TABLE_LIKE = False 479 MULTI_ARG_DISTINCT = False 480 CAN_IMPLEMENT_ARRAY_ANY = True 481 SUPPORTS_TO_NUMBER = False 482 COPY_HAS_INTO_KEYWORD = False 483 STAR_EXCEPT = "EXCLUDE" 484 PAD_FILL_PATTERN_IS_REQUIRED = True 485 ARRAY_CONCAT_IS_VAR_LEN = False 486 487 TRANSFORMS = { 488 **generator.Generator.TRANSFORMS, 489 exp.ApproxDistinct: approx_count_distinct_sql, 490 exp.Array: inline_array_unless_query, 491 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 492 exp.ArrayFilter: rename_func("LIST_FILTER"), 493 exp.ArraySize: rename_func("ARRAY_LENGTH"), 494 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 495 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 496 exp.ArraySort: _array_sort_sql, 497 exp.ArraySum: rename_func("LIST_SUM"), 498 exp.BitwiseXor: rename_func("XOR"), 499 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 500 exp.CurrentDate: lambda *_: "CURRENT_DATE", 501 exp.CurrentTime: lambda *_: "CURRENT_TIME", 502 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 503 exp.DayOfMonth: rename_func("DAYOFMONTH"), 504 exp.DayOfWeek: rename_func("DAYOFWEEK"), 505 exp.DayOfWeekIso: rename_func("ISODOW"), 506 exp.DayOfYear: rename_func("DAYOFYEAR"), 507 exp.DataType: _datatype_sql, 508 exp.Date: _date_sql, 509 exp.DateAdd: _date_delta_sql, 510 exp.DateFromParts: rename_func("MAKE_DATE"), 511 exp.DateSub: _date_delta_sql, 512 exp.DateDiff: _date_diff_sql, 513 exp.DateStrToDate: datestrtodate_sql, 514 exp.Datetime: no_datetime_sql, 515 exp.DatetimeSub: _date_delta_sql, 516 exp.DatetimeAdd: _date_delta_sql, 517 exp.DateToDi: lambda self, 518 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 519 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 520 exp.DiToDate: lambda self, 521 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 522 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 523 exp.GenerateDateArray: _generate_datetime_array_sql, 524 exp.GenerateTimestampArray: _generate_datetime_array_sql, 525 exp.Explode: rename_func("UNNEST"), 526 exp.IntDiv: lambda self, e: self.binary(e, "//"), 527 exp.IsInf: rename_func("ISINF"), 528 exp.IsNan: rename_func("ISNAN"), 529 exp.JSONExtract: _arrow_json_extract_sql, 530 exp.JSONExtractScalar: _arrow_json_extract_sql, 531 exp.JSONFormat: _json_format_sql, 532 exp.LogicalOr: rename_func("BOOL_OR"), 533 exp.LogicalAnd: rename_func("BOOL_AND"), 534 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 535 exp.MonthsBetween: lambda self, e: self.func( 536 "DATEDIFF", 537 "'month'", 538 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 539 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 540 ), 541 exp.PercentileCont: rename_func("QUANTILE_CONT"), 542 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 543 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 544 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 545 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 546 exp.RegexpExtract: regexp_extract_sql, 547 exp.RegexpReplace: lambda self, e: self.func( 548 "REGEXP_REPLACE", 549 e.this, 550 e.expression, 551 e.args.get("replacement"), 552 e.args.get("modifiers"), 553 ), 554 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 555 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 556 exp.Return: lambda self, e: self.sql(e, "this"), 557 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 558 exp.Rand: rename_func("RANDOM"), 559 exp.SafeDivide: no_safe_divide_sql, 560 exp.SHA: rename_func("SHA1"), 561 exp.SHA2: sha256_sql, 562 exp.Split: rename_func("STR_SPLIT"), 563 exp.SortArray: _sort_array_sql, 564 exp.StrPosition: str_position_sql, 565 exp.StrToUnix: lambda self, e: self.func( 566 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 567 ), 568 exp.Struct: _struct_sql, 569 exp.Transform: rename_func("LIST_TRANSFORM"), 570 exp.TimeAdd: _date_delta_sql, 571 exp.Time: no_time_sql, 572 exp.TimeDiff: _timediff_sql, 573 exp.Timestamp: no_timestamp_sql, 574 exp.TimestampDiff: lambda self, e: self.func( 575 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 576 ), 577 exp.TimestampTrunc: timestamptrunc_sql(), 578 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 579 exp.TimeStrToTime: timestrtotime_sql, 580 exp.TimeStrToUnix: lambda self, e: self.func( 581 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 582 ), 583 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 584 exp.TimeToUnix: rename_func("EPOCH"), 585 exp.TsOrDiToDi: lambda self, 586 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 587 exp.TsOrDsAdd: _date_delta_sql, 588 exp.TsOrDsDiff: lambda self, e: self.func( 589 "DATE_DIFF", 590 f"'{e.args.get('unit') or 'DAY'}'", 591 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 592 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 593 ), 594 exp.UnixToStr: lambda self, e: self.func( 595 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 596 ), 597 exp.DatetimeTrunc: lambda self, e: self.func( 598 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 599 ), 600 exp.UnixToTime: _unix_to_time_sql, 601 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 602 exp.VariancePop: rename_func("VAR_POP"), 603 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 604 exp.Xor: bool_xor_sql, 605 } 606 607 SUPPORTED_JSON_PATH_PARTS = { 608 exp.JSONPathKey, 609 exp.JSONPathRoot, 610 exp.JSONPathSubscript, 611 exp.JSONPathWildcard, 612 } 613 614 TYPE_MAPPING = { 615 **generator.Generator.TYPE_MAPPING, 616 exp.DataType.Type.BINARY: "BLOB", 617 exp.DataType.Type.BPCHAR: "TEXT", 618 exp.DataType.Type.CHAR: "TEXT", 619 exp.DataType.Type.FLOAT: "REAL", 620 exp.DataType.Type.NCHAR: "TEXT", 621 exp.DataType.Type.NVARCHAR: "TEXT", 622 exp.DataType.Type.UINT: "UINTEGER", 623 exp.DataType.Type.VARBINARY: "BLOB", 624 exp.DataType.Type.ROWVERSION: "BLOB", 625 exp.DataType.Type.VARCHAR: "TEXT", 626 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 627 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 628 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 629 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 630 } 631 632 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 633 RESERVED_KEYWORDS = { 634 "array", 635 "analyse", 636 "union", 637 "all", 638 "when", 639 "in_p", 640 "default", 641 "create_p", 642 "window", 643 "asymmetric", 644 "to", 645 "else", 646 "localtime", 647 "from", 648 "end_p", 649 "select", 650 "current_date", 651 "foreign", 652 "with", 653 "grant", 654 "session_user", 655 "or", 656 "except", 657 "references", 658 "fetch", 659 "limit", 660 "group_p", 661 "leading", 662 "into", 663 "collate", 664 "offset", 665 "do", 666 "then", 667 "localtimestamp", 668 "check_p", 669 "lateral_p", 670 "current_role", 671 "where", 672 "asc_p", 673 "placing", 674 "desc_p", 675 "user", 676 "unique", 677 "initially", 678 "column", 679 "both", 680 "some", 681 "as", 682 "any", 683 "only", 684 "deferrable", 685 "null_p", 686 "current_time", 687 "true_p", 688 "table", 689 "case", 690 "trailing", 691 "variadic", 692 "for", 693 "on", 694 "distinct", 695 "false_p", 696 "not", 697 "constraint", 698 "current_timestamp", 699 "returning", 700 "primary", 701 "intersect", 702 "having", 703 "analyze", 704 "current_user", 705 "and", 706 "cast", 707 "symmetric", 708 "using", 709 "order", 710 "current_catalog", 711 } 712 713 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 714 715 # DuckDB doesn't generally support CREATE TABLE .. properties 716 # https://duckdb.org/docs/sql/statements/create_table.html 717 PROPERTIES_LOCATION = { 718 prop: exp.Properties.Location.UNSUPPORTED 719 for prop in generator.Generator.PROPERTIES_LOCATION 720 } 721 722 # There are a few exceptions (e.g. temporary tables) which are supported or 723 # can be transpiled to DuckDB, so we explicitly override them accordingly 724 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 725 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 726 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 727 728 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 729 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 730 731 def strtotime_sql(self, expression: exp.StrToTime) -> str: 732 if expression.args.get("safe"): 733 formatted_time = self.format_time(expression) 734 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 735 return str_to_time_sql(self, expression) 736 737 def strtodate_sql(self, expression: exp.StrToDate) -> str: 738 if expression.args.get("safe"): 739 formatted_time = self.format_time(expression) 740 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 741 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 742 743 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 744 arg = expression.this 745 if expression.args.get("safe"): 746 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 747 return self.func("JSON", arg) 748 749 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 750 nano = expression.args.get("nano") 751 if nano is not None: 752 expression.set( 753 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 754 ) 755 756 return rename_func("MAKE_TIME")(self, expression) 757 758 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 759 sec = expression.args["sec"] 760 761 milli = expression.args.get("milli") 762 if milli is not None: 763 sec += milli.pop() / exp.Literal.number(1000.0) 764 765 nano = expression.args.get("nano") 766 if nano is not None: 767 sec += nano.pop() / exp.Literal.number(1000000000.0) 768 769 if milli or nano: 770 expression.set("sec", sec) 771 772 return rename_func("MAKE_TIMESTAMP")(self, expression) 773 774 def tablesample_sql( 775 self, 776 expression: exp.TableSample, 777 tablesample_keyword: t.Optional[str] = None, 778 ) -> str: 779 if not isinstance(expression.parent, exp.Select): 780 # This sample clause only applies to a single source, not the entire resulting relation 781 tablesample_keyword = "TABLESAMPLE" 782 783 if expression.args.get("size"): 784 method = expression.args.get("method") 785 if method and method.name.upper() != "RESERVOIR": 786 self.unsupported( 787 f"Sampling method {method} is not supported with a discrete sample count, " 788 "defaulting to reservoir sampling" 789 ) 790 expression.set("method", exp.var("RESERVOIR")) 791 792 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 793 794 def interval_sql(self, expression: exp.Interval) -> str: 795 multiplier: t.Optional[int] = None 796 unit = expression.text("unit").lower() 797 798 if unit.startswith("week"): 799 multiplier = 7 800 if unit.startswith("quarter"): 801 multiplier = 90 802 803 if multiplier: 804 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 805 806 return super().interval_sql(expression) 807 808 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 809 if isinstance(expression.parent, exp.UserDefinedFunction): 810 return self.sql(expression, "this") 811 return super().columndef_sql(expression, sep) 812 813 def join_sql(self, expression: exp.Join) -> str: 814 if ( 815 expression.side == "LEFT" 816 and not expression.args.get("on") 817 and isinstance(expression.this, exp.Unnest) 818 ): 819 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 820 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 821 return super().join_sql(expression.on(exp.true())) 822 823 return super().join_sql(expression) 824 825 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 826 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 827 if expression.args.get("is_end_exclusive"): 828 return rename_func("RANGE")(self, expression) 829 830 return self.function_fallback_sql(expression) 831 832 def bracket_sql(self, expression: exp.Bracket) -> str: 833 this = expression.this 834 if isinstance(this, exp.Array): 835 this.replace(exp.paren(this)) 836 837 bracket = super().bracket_sql(expression) 838 839 if not expression.args.get("returns_list_for_maps"): 840 if not this.type: 841 from sqlglot.optimizer.annotate_types import annotate_types 842 843 this = annotate_types(this) 844 845 if this.is_type(exp.DataType.Type.MAP): 846 bracket = f"({bracket})[1]" 847 848 return bracket 849 850 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 851 expression_sql = self.sql(expression, "expression") 852 853 func = expression.this 854 if isinstance(func, exp.PERCENTILES): 855 # Make the order key the first arg and slide the fraction to the right 856 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 857 order_col = expression.find(exp.Ordered) 858 if order_col: 859 func.set("expression", func.this) 860 func.set("this", order_col.this) 861 862 this = self.sql(expression, "this").rstrip(")") 863 864 return f"{this}{expression_sql})" 865 866 def length_sql(self, expression: exp.Length) -> str: 867 arg = expression.this 868 869 # Dialects like BQ and Snowflake also accept binary values as args, so 870 # DDB will attempt to infer the type or resort to case/when resolution 871 if not expression.args.get("binary") or arg.is_string: 872 return self.func("LENGTH", arg) 873 874 if not arg.type: 875 from sqlglot.optimizer.annotate_types import annotate_types 876 877 arg = annotate_types(arg) 878 879 if arg.is_type(*exp.DataType.TEXT_TYPES): 880 return self.func("LENGTH", arg) 881 882 # We need these casts to make duckdb's static type checker happy 883 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 884 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 885 886 case = ( 887 exp.case(self.func("TYPEOF", arg)) 888 .when( 889 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 890 ) # anonymous to break length_sql recursion 891 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 892 ) 893 894 return self.sql(case) 895 896 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 897 this = expression.this 898 key = expression.args.get("key") 899 key_sql = key.name if isinstance(key, exp.Expression) else "" 900 value_sql = self.sql(expression, "value") 901 902 kv_sql = f"{key_sql} := {value_sql}" 903 904 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 905 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 906 if isinstance(this, exp.Struct) and not this.expressions: 907 return self.func("STRUCT_PACK", kv_sql) 908 909 return self.func("STRUCT_INSERT", this, kv_sql) 910 911 def unnest_sql(self, expression: exp.Unnest) -> str: 912 explode_array = expression.args.get("explode_array") 913 if explode_array: 914 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 915 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 916 expression.expressions.append( 917 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 918 ) 919 920 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 921 alias = expression.args.get("alias") 922 if alias: 923 expression.set("alias", None) 924 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 925 926 unnest_sql = super().unnest_sql(expression) 927 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 928 return self.sql(select) 929 930 return super().unnest_sql(expression) 931 932 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 933 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 934 # DuckDB should render IGNORE NULLS only for the general-purpose 935 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 936 return super().ignorenulls_sql(expression) 937 938 return self.sql(expression, "this") 939 940 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 941 this = self.sql(expression, "this") 942 null_text = self.sql(expression, "null") 943 944 if null_text: 945 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 946 947 return self.func("ARRAY_TO_STRING", this, expression.expression)
259class DuckDB(Dialect): 260 NULL_ORDERING = "nulls_are_last" 261 SUPPORTS_USER_DEFINED_TYPES = False 262 SAFE_DIVISION = True 263 INDEX_OFFSET = 1 264 CONCAT_COALESCE = True 265 SUPPORTS_ORDER_BY_ALL = True 266 SUPPORTS_FIXED_SIZE_ARRAYS = True 267 STRICT_JSON_PATH_SYNTAX = False 268 269 # https://duckdb.org/docs/sql/introduction.html#creating-a-new-table 270 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 271 272 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 273 if isinstance(path, exp.Literal): 274 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 275 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 276 # This check ensures we'll avoid trying to parse these as JSON paths, which can 277 # either result in a noisy warning or in an invalid representation of the path. 278 path_text = path.name 279 if path_text.startswith("/") or "[#" in path_text: 280 return path 281 282 return super().to_json_path(path) 283 284 class Tokenizer(tokens.Tokenizer): 285 HEREDOC_STRINGS = ["$"] 286 287 HEREDOC_TAG_IS_IDENTIFIER = True 288 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 289 290 KEYWORDS = { 291 **tokens.Tokenizer.KEYWORDS, 292 "//": TokenType.DIV, 293 "**": TokenType.DSTAR, 294 "ATTACH": TokenType.COMMAND, 295 "BINARY": TokenType.VARBINARY, 296 "BITSTRING": TokenType.BIT, 297 "BPCHAR": TokenType.TEXT, 298 "CHAR": TokenType.TEXT, 299 "CHARACTER VARYING": TokenType.TEXT, 300 "EXCLUDE": TokenType.EXCEPT, 301 "LOGICAL": TokenType.BOOLEAN, 302 "ONLY": TokenType.ONLY, 303 "PIVOT_WIDER": TokenType.PIVOT, 304 "POSITIONAL": TokenType.POSITIONAL, 305 "SIGNED": TokenType.INT, 306 "STRING": TokenType.TEXT, 307 "SUMMARIZE": TokenType.SUMMARIZE, 308 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 309 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 310 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 311 "TIMESTAMP_US": TokenType.TIMESTAMP, 312 "UBIGINT": TokenType.UBIGINT, 313 "UINTEGER": TokenType.UINT, 314 "USMALLINT": TokenType.USMALLINT, 315 "UTINYINT": TokenType.UTINYINT, 316 "VARCHAR": TokenType.TEXT, 317 } 318 KEYWORDS.pop("/*+") 319 320 SINGLE_TOKENS = { 321 **tokens.Tokenizer.SINGLE_TOKENS, 322 "$": TokenType.PARAMETER, 323 } 324 325 class Parser(parser.Parser): 326 BITWISE = { 327 **parser.Parser.BITWISE, 328 TokenType.TILDA: exp.RegexpLike, 329 } 330 BITWISE.pop(TokenType.CARET) 331 332 EXPONENT = { 333 **parser.Parser.EXPONENT, 334 TokenType.CARET: exp.Pow, 335 TokenType.DSTAR: exp.Pow, 336 } 337 338 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 339 340 FUNCTIONS = { 341 **parser.Parser.FUNCTIONS, 342 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 343 "ARRAY_SORT": exp.SortArray.from_arg_list, 344 "DATEDIFF": _build_date_diff, 345 "DATE_DIFF": _build_date_diff, 346 "DATE_TRUNC": date_trunc_to_time, 347 "DATETRUNC": date_trunc_to_time, 348 "DECODE": lambda args: exp.Decode( 349 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 350 ), 351 "ENCODE": lambda args: exp.Encode( 352 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 353 ), 354 "EPOCH": exp.TimeToUnix.from_arg_list, 355 "EPOCH_MS": lambda args: exp.UnixToTime( 356 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 357 ), 358 "JSON": exp.ParseJSON.from_arg_list, 359 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 360 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 361 "LIST_HAS": exp.ArrayContains.from_arg_list, 362 "LIST_REVERSE_SORT": _build_sort_array_desc, 363 "LIST_SORT": exp.SortArray.from_arg_list, 364 "LIST_VALUE": lambda args: exp.Array(expressions=args), 365 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 366 "MAKE_TIMESTAMP": _build_make_timestamp, 367 "MEDIAN": lambda args: exp.PercentileCont( 368 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 369 ), 370 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 371 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 372 "REGEXP_EXTRACT": build_regexp_extract, 373 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 374 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 375 this=seq_get(args, 0), 376 expression=seq_get(args, 1), 377 replacement=seq_get(args, 2), 378 modifiers=seq_get(args, 3), 379 ), 380 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 381 "STRING_SPLIT": exp.Split.from_arg_list, 382 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 383 "STRING_TO_ARRAY": exp.Split.from_arg_list, 384 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 385 "STRUCT_PACK": exp.Struct.from_arg_list, 386 "STR_SPLIT": exp.Split.from_arg_list, 387 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 388 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 389 "UNNEST": exp.Explode.from_arg_list, 390 "XOR": binary_from_function(exp.BitwiseXor), 391 "GENERATE_SERIES": _build_generate_series(), 392 "RANGE": _build_generate_series(end_exclusive=True), 393 } 394 395 FUNCTIONS.pop("DATE_SUB") 396 FUNCTIONS.pop("GLOB") 397 398 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 399 FUNCTION_PARSERS.pop("DECODE") 400 401 NO_PAREN_FUNCTION_PARSERS = { 402 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 403 "MAP": lambda self: self._parse_map(), 404 } 405 406 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 407 TokenType.SEMI, 408 TokenType.ANTI, 409 } 410 411 PLACEHOLDER_PARSERS = { 412 **parser.Parser.PLACEHOLDER_PARSERS, 413 TokenType.PARAMETER: lambda self: ( 414 self.expression(exp.Placeholder, this=self._prev.text) 415 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 416 else None 417 ), 418 } 419 420 TYPE_CONVERTERS = { 421 # https://duckdb.org/docs/sql/data_types/numeric 422 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 423 # https://duckdb.org/docs/sql/data_types/text 424 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 425 } 426 427 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 428 # https://duckdb.org/docs/sql/samples.html 429 sample = super()._parse_table_sample(as_modifier=as_modifier) 430 if sample and not sample.args.get("method"): 431 if sample.args.get("size"): 432 sample.set("method", exp.var("RESERVOIR")) 433 else: 434 sample.set("method", exp.var("SYSTEM")) 435 436 return sample 437 438 def _parse_bracket( 439 self, this: t.Optional[exp.Expression] = None 440 ) -> t.Optional[exp.Expression]: 441 bracket = super()._parse_bracket(this) 442 if isinstance(bracket, exp.Bracket): 443 bracket.set("returns_list_for_maps", True) 444 445 return bracket 446 447 def _parse_map(self) -> exp.ToMap | exp.Map: 448 if self._match(TokenType.L_BRACE, advance=False): 449 return self.expression(exp.ToMap, this=self._parse_bracket()) 450 451 args = self._parse_wrapped_csv(self._parse_assignment) 452 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 453 454 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 455 return self._parse_field_def() 456 457 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 458 if len(aggregations) == 1: 459 return super()._pivot_column_names(aggregations) 460 return pivot_column_names(aggregations, dialect="duckdb") 461 462 class Generator(generator.Generator): 463 PARAMETER_TOKEN = "$" 464 NAMED_PLACEHOLDER_TOKEN = "$" 465 JOIN_HINTS = False 466 TABLE_HINTS = False 467 QUERY_HINTS = False 468 LIMIT_FETCH = "LIMIT" 469 STRUCT_DELIMITER = ("(", ")") 470 RENAME_TABLE_WITH_DB = False 471 NVL2_SUPPORTED = False 472 SEMI_ANTI_JOIN_WITH_SIDE = False 473 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 474 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 475 LAST_DAY_SUPPORTS_DATE_PART = False 476 JSON_KEY_VALUE_PAIR_SEP = "," 477 IGNORE_NULLS_IN_FUNC = True 478 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 479 SUPPORTS_CREATE_TABLE_LIKE = False 480 MULTI_ARG_DISTINCT = False 481 CAN_IMPLEMENT_ARRAY_ANY = True 482 SUPPORTS_TO_NUMBER = False 483 COPY_HAS_INTO_KEYWORD = False 484 STAR_EXCEPT = "EXCLUDE" 485 PAD_FILL_PATTERN_IS_REQUIRED = True 486 ARRAY_CONCAT_IS_VAR_LEN = False 487 488 TRANSFORMS = { 489 **generator.Generator.TRANSFORMS, 490 exp.ApproxDistinct: approx_count_distinct_sql, 491 exp.Array: inline_array_unless_query, 492 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 493 exp.ArrayFilter: rename_func("LIST_FILTER"), 494 exp.ArraySize: rename_func("ARRAY_LENGTH"), 495 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 496 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 497 exp.ArraySort: _array_sort_sql, 498 exp.ArraySum: rename_func("LIST_SUM"), 499 exp.BitwiseXor: rename_func("XOR"), 500 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 501 exp.CurrentDate: lambda *_: "CURRENT_DATE", 502 exp.CurrentTime: lambda *_: "CURRENT_TIME", 503 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 504 exp.DayOfMonth: rename_func("DAYOFMONTH"), 505 exp.DayOfWeek: rename_func("DAYOFWEEK"), 506 exp.DayOfWeekIso: rename_func("ISODOW"), 507 exp.DayOfYear: rename_func("DAYOFYEAR"), 508 exp.DataType: _datatype_sql, 509 exp.Date: _date_sql, 510 exp.DateAdd: _date_delta_sql, 511 exp.DateFromParts: rename_func("MAKE_DATE"), 512 exp.DateSub: _date_delta_sql, 513 exp.DateDiff: _date_diff_sql, 514 exp.DateStrToDate: datestrtodate_sql, 515 exp.Datetime: no_datetime_sql, 516 exp.DatetimeSub: _date_delta_sql, 517 exp.DatetimeAdd: _date_delta_sql, 518 exp.DateToDi: lambda self, 519 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 520 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 521 exp.DiToDate: lambda self, 522 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 523 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 524 exp.GenerateDateArray: _generate_datetime_array_sql, 525 exp.GenerateTimestampArray: _generate_datetime_array_sql, 526 exp.Explode: rename_func("UNNEST"), 527 exp.IntDiv: lambda self, e: self.binary(e, "//"), 528 exp.IsInf: rename_func("ISINF"), 529 exp.IsNan: rename_func("ISNAN"), 530 exp.JSONExtract: _arrow_json_extract_sql, 531 exp.JSONExtractScalar: _arrow_json_extract_sql, 532 exp.JSONFormat: _json_format_sql, 533 exp.LogicalOr: rename_func("BOOL_OR"), 534 exp.LogicalAnd: rename_func("BOOL_AND"), 535 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 536 exp.MonthsBetween: lambda self, e: self.func( 537 "DATEDIFF", 538 "'month'", 539 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 540 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 541 ), 542 exp.PercentileCont: rename_func("QUANTILE_CONT"), 543 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 544 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 545 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 546 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 547 exp.RegexpExtract: regexp_extract_sql, 548 exp.RegexpReplace: lambda self, e: self.func( 549 "REGEXP_REPLACE", 550 e.this, 551 e.expression, 552 e.args.get("replacement"), 553 e.args.get("modifiers"), 554 ), 555 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 556 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 557 exp.Return: lambda self, e: self.sql(e, "this"), 558 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 559 exp.Rand: rename_func("RANDOM"), 560 exp.SafeDivide: no_safe_divide_sql, 561 exp.SHA: rename_func("SHA1"), 562 exp.SHA2: sha256_sql, 563 exp.Split: rename_func("STR_SPLIT"), 564 exp.SortArray: _sort_array_sql, 565 exp.StrPosition: str_position_sql, 566 exp.StrToUnix: lambda self, e: self.func( 567 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 568 ), 569 exp.Struct: _struct_sql, 570 exp.Transform: rename_func("LIST_TRANSFORM"), 571 exp.TimeAdd: _date_delta_sql, 572 exp.Time: no_time_sql, 573 exp.TimeDiff: _timediff_sql, 574 exp.Timestamp: no_timestamp_sql, 575 exp.TimestampDiff: lambda self, e: self.func( 576 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 577 ), 578 exp.TimestampTrunc: timestamptrunc_sql(), 579 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 580 exp.TimeStrToTime: timestrtotime_sql, 581 exp.TimeStrToUnix: lambda self, e: self.func( 582 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 583 ), 584 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 585 exp.TimeToUnix: rename_func("EPOCH"), 586 exp.TsOrDiToDi: lambda self, 587 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 588 exp.TsOrDsAdd: _date_delta_sql, 589 exp.TsOrDsDiff: lambda self, e: self.func( 590 "DATE_DIFF", 591 f"'{e.args.get('unit') or 'DAY'}'", 592 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 593 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 594 ), 595 exp.UnixToStr: lambda self, e: self.func( 596 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 597 ), 598 exp.DatetimeTrunc: lambda self, e: self.func( 599 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 600 ), 601 exp.UnixToTime: _unix_to_time_sql, 602 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 603 exp.VariancePop: rename_func("VAR_POP"), 604 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 605 exp.Xor: bool_xor_sql, 606 } 607 608 SUPPORTED_JSON_PATH_PARTS = { 609 exp.JSONPathKey, 610 exp.JSONPathRoot, 611 exp.JSONPathSubscript, 612 exp.JSONPathWildcard, 613 } 614 615 TYPE_MAPPING = { 616 **generator.Generator.TYPE_MAPPING, 617 exp.DataType.Type.BINARY: "BLOB", 618 exp.DataType.Type.BPCHAR: "TEXT", 619 exp.DataType.Type.CHAR: "TEXT", 620 exp.DataType.Type.FLOAT: "REAL", 621 exp.DataType.Type.NCHAR: "TEXT", 622 exp.DataType.Type.NVARCHAR: "TEXT", 623 exp.DataType.Type.UINT: "UINTEGER", 624 exp.DataType.Type.VARBINARY: "BLOB", 625 exp.DataType.Type.ROWVERSION: "BLOB", 626 exp.DataType.Type.VARCHAR: "TEXT", 627 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 628 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 629 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 630 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 631 } 632 633 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 634 RESERVED_KEYWORDS = { 635 "array", 636 "analyse", 637 "union", 638 "all", 639 "when", 640 "in_p", 641 "default", 642 "create_p", 643 "window", 644 "asymmetric", 645 "to", 646 "else", 647 "localtime", 648 "from", 649 "end_p", 650 "select", 651 "current_date", 652 "foreign", 653 "with", 654 "grant", 655 "session_user", 656 "or", 657 "except", 658 "references", 659 "fetch", 660 "limit", 661 "group_p", 662 "leading", 663 "into", 664 "collate", 665 "offset", 666 "do", 667 "then", 668 "localtimestamp", 669 "check_p", 670 "lateral_p", 671 "current_role", 672 "where", 673 "asc_p", 674 "placing", 675 "desc_p", 676 "user", 677 "unique", 678 "initially", 679 "column", 680 "both", 681 "some", 682 "as", 683 "any", 684 "only", 685 "deferrable", 686 "null_p", 687 "current_time", 688 "true_p", 689 "table", 690 "case", 691 "trailing", 692 "variadic", 693 "for", 694 "on", 695 "distinct", 696 "false_p", 697 "not", 698 "constraint", 699 "current_timestamp", 700 "returning", 701 "primary", 702 "intersect", 703 "having", 704 "analyze", 705 "current_user", 706 "and", 707 "cast", 708 "symmetric", 709 "using", 710 "order", 711 "current_catalog", 712 } 713 714 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 715 716 # DuckDB doesn't generally support CREATE TABLE .. properties 717 # https://duckdb.org/docs/sql/statements/create_table.html 718 PROPERTIES_LOCATION = { 719 prop: exp.Properties.Location.UNSUPPORTED 720 for prop in generator.Generator.PROPERTIES_LOCATION 721 } 722 723 # There are a few exceptions (e.g. temporary tables) which are supported or 724 # can be transpiled to DuckDB, so we explicitly override them accordingly 725 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 726 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 727 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 728 729 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 730 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 731 732 def strtotime_sql(self, expression: exp.StrToTime) -> str: 733 if expression.args.get("safe"): 734 formatted_time = self.format_time(expression) 735 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 736 return str_to_time_sql(self, expression) 737 738 def strtodate_sql(self, expression: exp.StrToDate) -> str: 739 if expression.args.get("safe"): 740 formatted_time = self.format_time(expression) 741 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 742 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 743 744 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 745 arg = expression.this 746 if expression.args.get("safe"): 747 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 748 return self.func("JSON", arg) 749 750 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 751 nano = expression.args.get("nano") 752 if nano is not None: 753 expression.set( 754 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 755 ) 756 757 return rename_func("MAKE_TIME")(self, expression) 758 759 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 760 sec = expression.args["sec"] 761 762 milli = expression.args.get("milli") 763 if milli is not None: 764 sec += milli.pop() / exp.Literal.number(1000.0) 765 766 nano = expression.args.get("nano") 767 if nano is not None: 768 sec += nano.pop() / exp.Literal.number(1000000000.0) 769 770 if milli or nano: 771 expression.set("sec", sec) 772 773 return rename_func("MAKE_TIMESTAMP")(self, expression) 774 775 def tablesample_sql( 776 self, 777 expression: exp.TableSample, 778 tablesample_keyword: t.Optional[str] = None, 779 ) -> str: 780 if not isinstance(expression.parent, exp.Select): 781 # This sample clause only applies to a single source, not the entire resulting relation 782 tablesample_keyword = "TABLESAMPLE" 783 784 if expression.args.get("size"): 785 method = expression.args.get("method") 786 if method and method.name.upper() != "RESERVOIR": 787 self.unsupported( 788 f"Sampling method {method} is not supported with a discrete sample count, " 789 "defaulting to reservoir sampling" 790 ) 791 expression.set("method", exp.var("RESERVOIR")) 792 793 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 794 795 def interval_sql(self, expression: exp.Interval) -> str: 796 multiplier: t.Optional[int] = None 797 unit = expression.text("unit").lower() 798 799 if unit.startswith("week"): 800 multiplier = 7 801 if unit.startswith("quarter"): 802 multiplier = 90 803 804 if multiplier: 805 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 806 807 return super().interval_sql(expression) 808 809 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 810 if isinstance(expression.parent, exp.UserDefinedFunction): 811 return self.sql(expression, "this") 812 return super().columndef_sql(expression, sep) 813 814 def join_sql(self, expression: exp.Join) -> str: 815 if ( 816 expression.side == "LEFT" 817 and not expression.args.get("on") 818 and isinstance(expression.this, exp.Unnest) 819 ): 820 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 821 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 822 return super().join_sql(expression.on(exp.true())) 823 824 return super().join_sql(expression) 825 826 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 827 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 828 if expression.args.get("is_end_exclusive"): 829 return rename_func("RANGE")(self, expression) 830 831 return self.function_fallback_sql(expression) 832 833 def bracket_sql(self, expression: exp.Bracket) -> str: 834 this = expression.this 835 if isinstance(this, exp.Array): 836 this.replace(exp.paren(this)) 837 838 bracket = super().bracket_sql(expression) 839 840 if not expression.args.get("returns_list_for_maps"): 841 if not this.type: 842 from sqlglot.optimizer.annotate_types import annotate_types 843 844 this = annotate_types(this) 845 846 if this.is_type(exp.DataType.Type.MAP): 847 bracket = f"({bracket})[1]" 848 849 return bracket 850 851 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 852 expression_sql = self.sql(expression, "expression") 853 854 func = expression.this 855 if isinstance(func, exp.PERCENTILES): 856 # Make the order key the first arg and slide the fraction to the right 857 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 858 order_col = expression.find(exp.Ordered) 859 if order_col: 860 func.set("expression", func.this) 861 func.set("this", order_col.this) 862 863 this = self.sql(expression, "this").rstrip(")") 864 865 return f"{this}{expression_sql})" 866 867 def length_sql(self, expression: exp.Length) -> str: 868 arg = expression.this 869 870 # Dialects like BQ and Snowflake also accept binary values as args, so 871 # DDB will attempt to infer the type or resort to case/when resolution 872 if not expression.args.get("binary") or arg.is_string: 873 return self.func("LENGTH", arg) 874 875 if not arg.type: 876 from sqlglot.optimizer.annotate_types import annotate_types 877 878 arg = annotate_types(arg) 879 880 if arg.is_type(*exp.DataType.TEXT_TYPES): 881 return self.func("LENGTH", arg) 882 883 # We need these casts to make duckdb's static type checker happy 884 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 885 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 886 887 case = ( 888 exp.case(self.func("TYPEOF", arg)) 889 .when( 890 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 891 ) # anonymous to break length_sql recursion 892 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 893 ) 894 895 return self.sql(case) 896 897 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 898 this = expression.this 899 key = expression.args.get("key") 900 key_sql = key.name if isinstance(key, exp.Expression) else "" 901 value_sql = self.sql(expression, "value") 902 903 kv_sql = f"{key_sql} := {value_sql}" 904 905 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 906 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 907 if isinstance(this, exp.Struct) and not this.expressions: 908 return self.func("STRUCT_PACK", kv_sql) 909 910 return self.func("STRUCT_INSERT", this, kv_sql) 911 912 def unnest_sql(self, expression: exp.Unnest) -> str: 913 explode_array = expression.args.get("explode_array") 914 if explode_array: 915 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 916 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 917 expression.expressions.append( 918 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 919 ) 920 921 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 922 alias = expression.args.get("alias") 923 if alias: 924 expression.set("alias", None) 925 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 926 927 unnest_sql = super().unnest_sql(expression) 928 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 929 return self.sql(select) 930 931 return super().unnest_sql(expression) 932 933 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 934 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 935 # DuckDB should render IGNORE NULLS only for the general-purpose 936 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 937 return super().ignorenulls_sql(expression) 938 939 return self.sql(expression, "this") 940 941 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 942 this = self.sql(expression, "this") 943 null_text = self.sql(expression, "null") 944 945 if null_text: 946 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 947 948 return self.func("ARRAY_TO_STRING", this, expression.expression)
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
A NULL
arg in CONCAT
yields NULL
by default, but in some dialects it yields an empty string.
Whether ORDER BY ALL is supported (expands to all the selected columns) as in DuckDB, Spark3/Databricks
Whether expressions such as x::INT[5] should be parsed as fixed-size array defs/casts e.g. in DuckDB. In dialects which don't support fixed size arrays such as Snowflake, this should be interpreted as a subscript/index operator.
Whether failing to parse a JSON path expression using the JSONPath dialect will log a warning.
Specifies the strategy according to which identifiers should be normalized.
272 def to_json_path(self, path: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 273 if isinstance(path, exp.Literal): 274 # DuckDB also supports the JSON pointer syntax, where every path starts with a `/`. 275 # Additionally, it allows accessing the back of lists using the `[#-i]` syntax. 276 # This check ensures we'll avoid trying to parse these as JSON paths, which can 277 # either result in a noisy warning or in an invalid representation of the path. 278 path_text = path.name 279 if path_text.startswith("/") or "[#" in path_text: 280 return path 281 282 return super().to_json_path(path)
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- NORMALIZE_FUNCTIONS
- LOG_BASE_FIRST
- TYPED_DIVISION
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- UNESCAPED_SEQUENCES
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- FORCE_EARLY_ALIAS_REF_EXPANSION
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- SET_OP_DISTINCT_BY_DEFAULT
- CREATABLE_KIND_MAPPING
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
284 class Tokenizer(tokens.Tokenizer): 285 HEREDOC_STRINGS = ["$"] 286 287 HEREDOC_TAG_IS_IDENTIFIER = True 288 HEREDOC_STRING_ALTERNATIVE = TokenType.PARAMETER 289 290 KEYWORDS = { 291 **tokens.Tokenizer.KEYWORDS, 292 "//": TokenType.DIV, 293 "**": TokenType.DSTAR, 294 "ATTACH": TokenType.COMMAND, 295 "BINARY": TokenType.VARBINARY, 296 "BITSTRING": TokenType.BIT, 297 "BPCHAR": TokenType.TEXT, 298 "CHAR": TokenType.TEXT, 299 "CHARACTER VARYING": TokenType.TEXT, 300 "EXCLUDE": TokenType.EXCEPT, 301 "LOGICAL": TokenType.BOOLEAN, 302 "ONLY": TokenType.ONLY, 303 "PIVOT_WIDER": TokenType.PIVOT, 304 "POSITIONAL": TokenType.POSITIONAL, 305 "SIGNED": TokenType.INT, 306 "STRING": TokenType.TEXT, 307 "SUMMARIZE": TokenType.SUMMARIZE, 308 "TIMESTAMP_S": TokenType.TIMESTAMP_S, 309 "TIMESTAMP_MS": TokenType.TIMESTAMP_MS, 310 "TIMESTAMP_NS": TokenType.TIMESTAMP_NS, 311 "TIMESTAMP_US": TokenType.TIMESTAMP, 312 "UBIGINT": TokenType.UBIGINT, 313 "UINTEGER": TokenType.UINT, 314 "USMALLINT": TokenType.USMALLINT, 315 "UTINYINT": TokenType.UTINYINT, 316 "VARCHAR": TokenType.TEXT, 317 } 318 KEYWORDS.pop("/*+") 319 320 SINGLE_TOKENS = { 321 **tokens.Tokenizer.SINGLE_TOKENS, 322 "$": TokenType.PARAMETER, 323 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- IDENTIFIER_ESCAPES
- QUOTES
- STRING_ESCAPES
- VAR_SINGLE_TOKENS
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
325 class Parser(parser.Parser): 326 BITWISE = { 327 **parser.Parser.BITWISE, 328 TokenType.TILDA: exp.RegexpLike, 329 } 330 BITWISE.pop(TokenType.CARET) 331 332 EXPONENT = { 333 **parser.Parser.EXPONENT, 334 TokenType.CARET: exp.Pow, 335 TokenType.DSTAR: exp.Pow, 336 } 337 338 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "STRUCT_PACK"} 339 340 FUNCTIONS = { 341 **parser.Parser.FUNCTIONS, 342 "ARRAY_REVERSE_SORT": _build_sort_array_desc, 343 "ARRAY_SORT": exp.SortArray.from_arg_list, 344 "DATEDIFF": _build_date_diff, 345 "DATE_DIFF": _build_date_diff, 346 "DATE_TRUNC": date_trunc_to_time, 347 "DATETRUNC": date_trunc_to_time, 348 "DECODE": lambda args: exp.Decode( 349 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 350 ), 351 "ENCODE": lambda args: exp.Encode( 352 this=seq_get(args, 0), charset=exp.Literal.string("utf-8") 353 ), 354 "EPOCH": exp.TimeToUnix.from_arg_list, 355 "EPOCH_MS": lambda args: exp.UnixToTime( 356 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 357 ), 358 "JSON": exp.ParseJSON.from_arg_list, 359 "JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract), 360 "JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar), 361 "LIST_HAS": exp.ArrayContains.from_arg_list, 362 "LIST_REVERSE_SORT": _build_sort_array_desc, 363 "LIST_SORT": exp.SortArray.from_arg_list, 364 "LIST_VALUE": lambda args: exp.Array(expressions=args), 365 "MAKE_TIME": exp.TimeFromParts.from_arg_list, 366 "MAKE_TIMESTAMP": _build_make_timestamp, 367 "MEDIAN": lambda args: exp.PercentileCont( 368 this=seq_get(args, 0), expression=exp.Literal.number(0.5) 369 ), 370 "QUANTILE_CONT": exp.PercentileCont.from_arg_list, 371 "QUANTILE_DISC": exp.PercentileDisc.from_arg_list, 372 "REGEXP_EXTRACT": build_regexp_extract, 373 "REGEXP_MATCHES": exp.RegexpLike.from_arg_list, 374 "REGEXP_REPLACE": lambda args: exp.RegexpReplace( 375 this=seq_get(args, 0), 376 expression=seq_get(args, 1), 377 replacement=seq_get(args, 2), 378 modifiers=seq_get(args, 3), 379 ), 380 "STRFTIME": build_formatted_time(exp.TimeToStr, "duckdb"), 381 "STRING_SPLIT": exp.Split.from_arg_list, 382 "STRING_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 383 "STRING_TO_ARRAY": exp.Split.from_arg_list, 384 "STRPTIME": build_formatted_time(exp.StrToTime, "duckdb"), 385 "STRUCT_PACK": exp.Struct.from_arg_list, 386 "STR_SPLIT": exp.Split.from_arg_list, 387 "STR_SPLIT_REGEX": exp.RegexpSplit.from_arg_list, 388 "TO_TIMESTAMP": exp.UnixToTime.from_arg_list, 389 "UNNEST": exp.Explode.from_arg_list, 390 "XOR": binary_from_function(exp.BitwiseXor), 391 "GENERATE_SERIES": _build_generate_series(), 392 "RANGE": _build_generate_series(end_exclusive=True), 393 } 394 395 FUNCTIONS.pop("DATE_SUB") 396 FUNCTIONS.pop("GLOB") 397 398 FUNCTION_PARSERS = parser.Parser.FUNCTION_PARSERS.copy() 399 FUNCTION_PARSERS.pop("DECODE") 400 401 NO_PAREN_FUNCTION_PARSERS = { 402 **parser.Parser.NO_PAREN_FUNCTION_PARSERS, 403 "MAP": lambda self: self._parse_map(), 404 } 405 406 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 407 TokenType.SEMI, 408 TokenType.ANTI, 409 } 410 411 PLACEHOLDER_PARSERS = { 412 **parser.Parser.PLACEHOLDER_PARSERS, 413 TokenType.PARAMETER: lambda self: ( 414 self.expression(exp.Placeholder, this=self._prev.text) 415 if self._match(TokenType.NUMBER) or self._match_set(self.ID_VAR_TOKENS) 416 else None 417 ), 418 } 419 420 TYPE_CONVERTERS = { 421 # https://duckdb.org/docs/sql/data_types/numeric 422 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=18, scale=3), 423 # https://duckdb.org/docs/sql/data_types/text 424 exp.DataType.Type.TEXT: lambda dtype: exp.DataType.build("TEXT"), 425 } 426 427 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 428 # https://duckdb.org/docs/sql/samples.html 429 sample = super()._parse_table_sample(as_modifier=as_modifier) 430 if sample and not sample.args.get("method"): 431 if sample.args.get("size"): 432 sample.set("method", exp.var("RESERVOIR")) 433 else: 434 sample.set("method", exp.var("SYSTEM")) 435 436 return sample 437 438 def _parse_bracket( 439 self, this: t.Optional[exp.Expression] = None 440 ) -> t.Optional[exp.Expression]: 441 bracket = super()._parse_bracket(this) 442 if isinstance(bracket, exp.Bracket): 443 bracket.set("returns_list_for_maps", True) 444 445 return bracket 446 447 def _parse_map(self) -> exp.ToMap | exp.Map: 448 if self._match(TokenType.L_BRACE, advance=False): 449 return self.expression(exp.ToMap, this=self._parse_bracket()) 450 451 args = self._parse_wrapped_csv(self._parse_assignment) 452 return self.expression(exp.Map, keys=seq_get(args, 0), values=seq_get(args, 1)) 453 454 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 455 return self._parse_field_def() 456 457 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 458 if len(aggregations) == 1: 459 return super()._pivot_column_names(aggregations) 460 return pivot_column_names(aggregations, dialect="duckdb")
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- TERM
- FACTOR
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- RANGE_PARSERS
- PROPERTY_PARSERS
- CONSTRAINT_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- LOG_DEFAULTS_TO_LN
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
462 class Generator(generator.Generator): 463 PARAMETER_TOKEN = "$" 464 NAMED_PLACEHOLDER_TOKEN = "$" 465 JOIN_HINTS = False 466 TABLE_HINTS = False 467 QUERY_HINTS = False 468 LIMIT_FETCH = "LIMIT" 469 STRUCT_DELIMITER = ("(", ")") 470 RENAME_TABLE_WITH_DB = False 471 NVL2_SUPPORTED = False 472 SEMI_ANTI_JOIN_WITH_SIDE = False 473 TABLESAMPLE_KEYWORDS = "USING SAMPLE" 474 TABLESAMPLE_SEED_KEYWORD = "REPEATABLE" 475 LAST_DAY_SUPPORTS_DATE_PART = False 476 JSON_KEY_VALUE_PAIR_SEP = "," 477 IGNORE_NULLS_IN_FUNC = True 478 JSON_PATH_BRACKETED_KEY_SUPPORTED = False 479 SUPPORTS_CREATE_TABLE_LIKE = False 480 MULTI_ARG_DISTINCT = False 481 CAN_IMPLEMENT_ARRAY_ANY = True 482 SUPPORTS_TO_NUMBER = False 483 COPY_HAS_INTO_KEYWORD = False 484 STAR_EXCEPT = "EXCLUDE" 485 PAD_FILL_PATTERN_IS_REQUIRED = True 486 ARRAY_CONCAT_IS_VAR_LEN = False 487 488 TRANSFORMS = { 489 **generator.Generator.TRANSFORMS, 490 exp.ApproxDistinct: approx_count_distinct_sql, 491 exp.Array: inline_array_unless_query, 492 exp.ArrayContainsAll: rename_func("ARRAY_HAS_ALL"), 493 exp.ArrayFilter: rename_func("LIST_FILTER"), 494 exp.ArraySize: rename_func("ARRAY_LENGTH"), 495 exp.ArgMax: arg_max_or_min_no_count("ARG_MAX"), 496 exp.ArgMin: arg_max_or_min_no_count("ARG_MIN"), 497 exp.ArraySort: _array_sort_sql, 498 exp.ArraySum: rename_func("LIST_SUM"), 499 exp.BitwiseXor: rename_func("XOR"), 500 exp.CommentColumnConstraint: no_comment_column_constraint_sql, 501 exp.CurrentDate: lambda *_: "CURRENT_DATE", 502 exp.CurrentTime: lambda *_: "CURRENT_TIME", 503 exp.CurrentTimestamp: lambda *_: "CURRENT_TIMESTAMP", 504 exp.DayOfMonth: rename_func("DAYOFMONTH"), 505 exp.DayOfWeek: rename_func("DAYOFWEEK"), 506 exp.DayOfWeekIso: rename_func("ISODOW"), 507 exp.DayOfYear: rename_func("DAYOFYEAR"), 508 exp.DataType: _datatype_sql, 509 exp.Date: _date_sql, 510 exp.DateAdd: _date_delta_sql, 511 exp.DateFromParts: rename_func("MAKE_DATE"), 512 exp.DateSub: _date_delta_sql, 513 exp.DateDiff: _date_diff_sql, 514 exp.DateStrToDate: datestrtodate_sql, 515 exp.Datetime: no_datetime_sql, 516 exp.DatetimeSub: _date_delta_sql, 517 exp.DatetimeAdd: _date_delta_sql, 518 exp.DateToDi: lambda self, 519 e: f"CAST(STRFTIME({self.sql(e, 'this')}, {DuckDB.DATEINT_FORMAT}) AS INT)", 520 exp.Decode: lambda self, e: encode_decode_sql(self, e, "DECODE", replace=False), 521 exp.DiToDate: lambda self, 522 e: f"CAST(STRPTIME(CAST({self.sql(e, 'this')} AS TEXT), {DuckDB.DATEINT_FORMAT}) AS DATE)", 523 exp.Encode: lambda self, e: encode_decode_sql(self, e, "ENCODE", replace=False), 524 exp.GenerateDateArray: _generate_datetime_array_sql, 525 exp.GenerateTimestampArray: _generate_datetime_array_sql, 526 exp.Explode: rename_func("UNNEST"), 527 exp.IntDiv: lambda self, e: self.binary(e, "//"), 528 exp.IsInf: rename_func("ISINF"), 529 exp.IsNan: rename_func("ISNAN"), 530 exp.JSONExtract: _arrow_json_extract_sql, 531 exp.JSONExtractScalar: _arrow_json_extract_sql, 532 exp.JSONFormat: _json_format_sql, 533 exp.LogicalOr: rename_func("BOOL_OR"), 534 exp.LogicalAnd: rename_func("BOOL_AND"), 535 exp.MD5Digest: lambda self, e: self.func("UNHEX", self.func("MD5", e.this)), 536 exp.MonthsBetween: lambda self, e: self.func( 537 "DATEDIFF", 538 "'month'", 539 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP, copy=True), 540 exp.cast(e.this, exp.DataType.Type.TIMESTAMP, copy=True), 541 ), 542 exp.PercentileCont: rename_func("QUANTILE_CONT"), 543 exp.PercentileDisc: rename_func("QUANTILE_DISC"), 544 # DuckDB doesn't allow qualified columns inside of PIVOT expressions. 545 # See: https://github.com/duckdb/duckdb/blob/671faf92411182f81dce42ac43de8bfb05d9909e/src/planner/binder/tableref/bind_pivot.cpp#L61-L62 546 exp.Pivot: transforms.preprocess([transforms.unqualify_columns]), 547 exp.RegexpExtract: regexp_extract_sql, 548 exp.RegexpReplace: lambda self, e: self.func( 549 "REGEXP_REPLACE", 550 e.this, 551 e.expression, 552 e.args.get("replacement"), 553 e.args.get("modifiers"), 554 ), 555 exp.RegexpLike: rename_func("REGEXP_MATCHES"), 556 exp.RegexpSplit: rename_func("STR_SPLIT_REGEX"), 557 exp.Return: lambda self, e: self.sql(e, "this"), 558 exp.ReturnsProperty: lambda self, e: "TABLE" if isinstance(e.this, exp.Schema) else "", 559 exp.Rand: rename_func("RANDOM"), 560 exp.SafeDivide: no_safe_divide_sql, 561 exp.SHA: rename_func("SHA1"), 562 exp.SHA2: sha256_sql, 563 exp.Split: rename_func("STR_SPLIT"), 564 exp.SortArray: _sort_array_sql, 565 exp.StrPosition: str_position_sql, 566 exp.StrToUnix: lambda self, e: self.func( 567 "EPOCH", self.func("STRPTIME", e.this, self.format_time(e)) 568 ), 569 exp.Struct: _struct_sql, 570 exp.Transform: rename_func("LIST_TRANSFORM"), 571 exp.TimeAdd: _date_delta_sql, 572 exp.Time: no_time_sql, 573 exp.TimeDiff: _timediff_sql, 574 exp.Timestamp: no_timestamp_sql, 575 exp.TimestampDiff: lambda self, e: self.func( 576 "DATE_DIFF", exp.Literal.string(e.unit), e.expression, e.this 577 ), 578 exp.TimestampTrunc: timestamptrunc_sql(), 579 exp.TimeStrToDate: lambda self, e: self.sql(exp.cast(e.this, exp.DataType.Type.DATE)), 580 exp.TimeStrToTime: timestrtotime_sql, 581 exp.TimeStrToUnix: lambda self, e: self.func( 582 "EPOCH", exp.cast(e.this, exp.DataType.Type.TIMESTAMP) 583 ), 584 exp.TimeToStr: lambda self, e: self.func("STRFTIME", e.this, self.format_time(e)), 585 exp.TimeToUnix: rename_func("EPOCH"), 586 exp.TsOrDiToDi: lambda self, 587 e: f"CAST(SUBSTR(REPLACE(CAST({self.sql(e, 'this')} AS TEXT), '-', ''), 1, 8) AS INT)", 588 exp.TsOrDsAdd: _date_delta_sql, 589 exp.TsOrDsDiff: lambda self, e: self.func( 590 "DATE_DIFF", 591 f"'{e.args.get('unit') or 'DAY'}'", 592 exp.cast(e.expression, exp.DataType.Type.TIMESTAMP), 593 exp.cast(e.this, exp.DataType.Type.TIMESTAMP), 594 ), 595 exp.UnixToStr: lambda self, e: self.func( 596 "STRFTIME", self.func("TO_TIMESTAMP", e.this), self.format_time(e) 597 ), 598 exp.DatetimeTrunc: lambda self, e: self.func( 599 "DATE_TRUNC", unit_to_str(e), exp.cast(e.this, exp.DataType.Type.DATETIME) 600 ), 601 exp.UnixToTime: _unix_to_time_sql, 602 exp.UnixToTimeStr: lambda self, e: f"CAST(TO_TIMESTAMP({self.sql(e, 'this')}) AS TEXT)", 603 exp.VariancePop: rename_func("VAR_POP"), 604 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 605 exp.Xor: bool_xor_sql, 606 } 607 608 SUPPORTED_JSON_PATH_PARTS = { 609 exp.JSONPathKey, 610 exp.JSONPathRoot, 611 exp.JSONPathSubscript, 612 exp.JSONPathWildcard, 613 } 614 615 TYPE_MAPPING = { 616 **generator.Generator.TYPE_MAPPING, 617 exp.DataType.Type.BINARY: "BLOB", 618 exp.DataType.Type.BPCHAR: "TEXT", 619 exp.DataType.Type.CHAR: "TEXT", 620 exp.DataType.Type.FLOAT: "REAL", 621 exp.DataType.Type.NCHAR: "TEXT", 622 exp.DataType.Type.NVARCHAR: "TEXT", 623 exp.DataType.Type.UINT: "UINTEGER", 624 exp.DataType.Type.VARBINARY: "BLOB", 625 exp.DataType.Type.ROWVERSION: "BLOB", 626 exp.DataType.Type.VARCHAR: "TEXT", 627 exp.DataType.Type.TIMESTAMPNTZ: "TIMESTAMP", 628 exp.DataType.Type.TIMESTAMP_S: "TIMESTAMP_S", 629 exp.DataType.Type.TIMESTAMP_MS: "TIMESTAMP_MS", 630 exp.DataType.Type.TIMESTAMP_NS: "TIMESTAMP_NS", 631 } 632 633 # https://github.com/duckdb/duckdb/blob/ff7f24fd8e3128d94371827523dae85ebaf58713/third_party/libpg_query/grammar/keywords/reserved_keywords.list#L1-L77 634 RESERVED_KEYWORDS = { 635 "array", 636 "analyse", 637 "union", 638 "all", 639 "when", 640 "in_p", 641 "default", 642 "create_p", 643 "window", 644 "asymmetric", 645 "to", 646 "else", 647 "localtime", 648 "from", 649 "end_p", 650 "select", 651 "current_date", 652 "foreign", 653 "with", 654 "grant", 655 "session_user", 656 "or", 657 "except", 658 "references", 659 "fetch", 660 "limit", 661 "group_p", 662 "leading", 663 "into", 664 "collate", 665 "offset", 666 "do", 667 "then", 668 "localtimestamp", 669 "check_p", 670 "lateral_p", 671 "current_role", 672 "where", 673 "asc_p", 674 "placing", 675 "desc_p", 676 "user", 677 "unique", 678 "initially", 679 "column", 680 "both", 681 "some", 682 "as", 683 "any", 684 "only", 685 "deferrable", 686 "null_p", 687 "current_time", 688 "true_p", 689 "table", 690 "case", 691 "trailing", 692 "variadic", 693 "for", 694 "on", 695 "distinct", 696 "false_p", 697 "not", 698 "constraint", 699 "current_timestamp", 700 "returning", 701 "primary", 702 "intersect", 703 "having", 704 "analyze", 705 "current_user", 706 "and", 707 "cast", 708 "symmetric", 709 "using", 710 "order", 711 "current_catalog", 712 } 713 714 UNWRAPPED_INTERVAL_VALUES = (exp.Literal, exp.Paren) 715 716 # DuckDB doesn't generally support CREATE TABLE .. properties 717 # https://duckdb.org/docs/sql/statements/create_table.html 718 PROPERTIES_LOCATION = { 719 prop: exp.Properties.Location.UNSUPPORTED 720 for prop in generator.Generator.PROPERTIES_LOCATION 721 } 722 723 # There are a few exceptions (e.g. temporary tables) which are supported or 724 # can be transpiled to DuckDB, so we explicitly override them accordingly 725 PROPERTIES_LOCATION[exp.LikeProperty] = exp.Properties.Location.POST_SCHEMA 726 PROPERTIES_LOCATION[exp.TemporaryProperty] = exp.Properties.Location.POST_CREATE 727 PROPERTIES_LOCATION[exp.ReturnsProperty] = exp.Properties.Location.POST_ALIAS 728 729 def fromiso8601timestamp_sql(self, expression: exp.FromISO8601Timestamp) -> str: 730 return self.sql(exp.cast(expression.this, exp.DataType.Type.TIMESTAMPTZ)) 731 732 def strtotime_sql(self, expression: exp.StrToTime) -> str: 733 if expression.args.get("safe"): 734 formatted_time = self.format_time(expression) 735 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS TIMESTAMP)" 736 return str_to_time_sql(self, expression) 737 738 def strtodate_sql(self, expression: exp.StrToDate) -> str: 739 if expression.args.get("safe"): 740 formatted_time = self.format_time(expression) 741 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 742 return f"CAST({str_to_time_sql(self, expression)} AS DATE)" 743 744 def parsejson_sql(self, expression: exp.ParseJSON) -> str: 745 arg = expression.this 746 if expression.args.get("safe"): 747 return self.sql(exp.case().when(exp.func("json_valid", arg), arg).else_(exp.null())) 748 return self.func("JSON", arg) 749 750 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 751 nano = expression.args.get("nano") 752 if nano is not None: 753 expression.set( 754 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 755 ) 756 757 return rename_func("MAKE_TIME")(self, expression) 758 759 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 760 sec = expression.args["sec"] 761 762 milli = expression.args.get("milli") 763 if milli is not None: 764 sec += milli.pop() / exp.Literal.number(1000.0) 765 766 nano = expression.args.get("nano") 767 if nano is not None: 768 sec += nano.pop() / exp.Literal.number(1000000000.0) 769 770 if milli or nano: 771 expression.set("sec", sec) 772 773 return rename_func("MAKE_TIMESTAMP")(self, expression) 774 775 def tablesample_sql( 776 self, 777 expression: exp.TableSample, 778 tablesample_keyword: t.Optional[str] = None, 779 ) -> str: 780 if not isinstance(expression.parent, exp.Select): 781 # This sample clause only applies to a single source, not the entire resulting relation 782 tablesample_keyword = "TABLESAMPLE" 783 784 if expression.args.get("size"): 785 method = expression.args.get("method") 786 if method and method.name.upper() != "RESERVOIR": 787 self.unsupported( 788 f"Sampling method {method} is not supported with a discrete sample count, " 789 "defaulting to reservoir sampling" 790 ) 791 expression.set("method", exp.var("RESERVOIR")) 792 793 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword) 794 795 def interval_sql(self, expression: exp.Interval) -> str: 796 multiplier: t.Optional[int] = None 797 unit = expression.text("unit").lower() 798 799 if unit.startswith("week"): 800 multiplier = 7 801 if unit.startswith("quarter"): 802 multiplier = 90 803 804 if multiplier: 805 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 806 807 return super().interval_sql(expression) 808 809 def columndef_sql(self, expression: exp.ColumnDef, sep: str = " ") -> str: 810 if isinstance(expression.parent, exp.UserDefinedFunction): 811 return self.sql(expression, "this") 812 return super().columndef_sql(expression, sep) 813 814 def join_sql(self, expression: exp.Join) -> str: 815 if ( 816 expression.side == "LEFT" 817 and not expression.args.get("on") 818 and isinstance(expression.this, exp.Unnest) 819 ): 820 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 821 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 822 return super().join_sql(expression.on(exp.true())) 823 824 return super().join_sql(expression) 825 826 def generateseries_sql(self, expression: exp.GenerateSeries) -> str: 827 # GENERATE_SERIES(a, b) -> [a, b], RANGE(a, b) -> [a, b) 828 if expression.args.get("is_end_exclusive"): 829 return rename_func("RANGE")(self, expression) 830 831 return self.function_fallback_sql(expression) 832 833 def bracket_sql(self, expression: exp.Bracket) -> str: 834 this = expression.this 835 if isinstance(this, exp.Array): 836 this.replace(exp.paren(this)) 837 838 bracket = super().bracket_sql(expression) 839 840 if not expression.args.get("returns_list_for_maps"): 841 if not this.type: 842 from sqlglot.optimizer.annotate_types import annotate_types 843 844 this = annotate_types(this) 845 846 if this.is_type(exp.DataType.Type.MAP): 847 bracket = f"({bracket})[1]" 848 849 return bracket 850 851 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 852 expression_sql = self.sql(expression, "expression") 853 854 func = expression.this 855 if isinstance(func, exp.PERCENTILES): 856 # Make the order key the first arg and slide the fraction to the right 857 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 858 order_col = expression.find(exp.Ordered) 859 if order_col: 860 func.set("expression", func.this) 861 func.set("this", order_col.this) 862 863 this = self.sql(expression, "this").rstrip(")") 864 865 return f"{this}{expression_sql})" 866 867 def length_sql(self, expression: exp.Length) -> str: 868 arg = expression.this 869 870 # Dialects like BQ and Snowflake also accept binary values as args, so 871 # DDB will attempt to infer the type or resort to case/when resolution 872 if not expression.args.get("binary") or arg.is_string: 873 return self.func("LENGTH", arg) 874 875 if not arg.type: 876 from sqlglot.optimizer.annotate_types import annotate_types 877 878 arg = annotate_types(arg) 879 880 if arg.is_type(*exp.DataType.TEXT_TYPES): 881 return self.func("LENGTH", arg) 882 883 # We need these casts to make duckdb's static type checker happy 884 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 885 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 886 887 case = ( 888 exp.case(self.func("TYPEOF", arg)) 889 .when( 890 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 891 ) # anonymous to break length_sql recursion 892 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 893 ) 894 895 return self.sql(case) 896 897 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 898 this = expression.this 899 key = expression.args.get("key") 900 key_sql = key.name if isinstance(key, exp.Expression) else "" 901 value_sql = self.sql(expression, "value") 902 903 kv_sql = f"{key_sql} := {value_sql}" 904 905 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 906 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 907 if isinstance(this, exp.Struct) and not this.expressions: 908 return self.func("STRUCT_PACK", kv_sql) 909 910 return self.func("STRUCT_INSERT", this, kv_sql) 911 912 def unnest_sql(self, expression: exp.Unnest) -> str: 913 explode_array = expression.args.get("explode_array") 914 if explode_array: 915 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 916 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 917 expression.expressions.append( 918 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 919 ) 920 921 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 922 alias = expression.args.get("alias") 923 if alias: 924 expression.set("alias", None) 925 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 926 927 unnest_sql = super().unnest_sql(expression) 928 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 929 return self.sql(select) 930 931 return super().unnest_sql(expression) 932 933 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 934 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 935 # DuckDB should render IGNORE NULLS only for the general-purpose 936 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 937 return super().ignorenulls_sql(expression) 938 939 return self.sql(expression, "this") 940 941 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 942 this = self.sql(expression, "this") 943 null_text = self.sql(expression, "null") 944 945 if null_text: 946 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 947 948 return self.func("ARRAY_TO_STRING", this, expression.expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
738 def strtodate_sql(self, expression: exp.StrToDate) -> str: 739 if expression.args.get("safe"): 740 formatted_time = self.format_time(expression) 741 return f"CAST({self.func('TRY_STRPTIME', expression.this, formatted_time)} AS DATE)" 742 return f"CAST({str_to_time_sql(self, expression)} AS DATE)"
750 def timefromparts_sql(self, expression: exp.TimeFromParts) -> str: 751 nano = expression.args.get("nano") 752 if nano is not None: 753 expression.set( 754 "sec", expression.args["sec"] + nano.pop() / exp.Literal.number(1000000000.0) 755 ) 756 757 return rename_func("MAKE_TIME")(self, expression)
759 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 760 sec = expression.args["sec"] 761 762 milli = expression.args.get("milli") 763 if milli is not None: 764 sec += milli.pop() / exp.Literal.number(1000.0) 765 766 nano = expression.args.get("nano") 767 if nano is not None: 768 sec += nano.pop() / exp.Literal.number(1000000000.0) 769 770 if milli or nano: 771 expression.set("sec", sec) 772 773 return rename_func("MAKE_TIMESTAMP")(self, expression)
775 def tablesample_sql( 776 self, 777 expression: exp.TableSample, 778 tablesample_keyword: t.Optional[str] = None, 779 ) -> str: 780 if not isinstance(expression.parent, exp.Select): 781 # This sample clause only applies to a single source, not the entire resulting relation 782 tablesample_keyword = "TABLESAMPLE" 783 784 if expression.args.get("size"): 785 method = expression.args.get("method") 786 if method and method.name.upper() != "RESERVOIR": 787 self.unsupported( 788 f"Sampling method {method} is not supported with a discrete sample count, " 789 "defaulting to reservoir sampling" 790 ) 791 expression.set("method", exp.var("RESERVOIR")) 792 793 return super().tablesample_sql(expression, tablesample_keyword=tablesample_keyword)
795 def interval_sql(self, expression: exp.Interval) -> str: 796 multiplier: t.Optional[int] = None 797 unit = expression.text("unit").lower() 798 799 if unit.startswith("week"): 800 multiplier = 7 801 if unit.startswith("quarter"): 802 multiplier = 90 803 804 if multiplier: 805 return f"({multiplier} * {super().interval_sql(exp.Interval(this=expression.this, unit=exp.var('DAY')))})" 806 807 return super().interval_sql(expression)
814 def join_sql(self, expression: exp.Join) -> str: 815 if ( 816 expression.side == "LEFT" 817 and not expression.args.get("on") 818 and isinstance(expression.this, exp.Unnest) 819 ): 820 # Some dialects support `LEFT JOIN UNNEST(...)` without an explicit ON clause 821 # DuckDB doesn't, but we can just add a dummy ON clause that is always true 822 return super().join_sql(expression.on(exp.true())) 823 824 return super().join_sql(expression)
833 def bracket_sql(self, expression: exp.Bracket) -> str: 834 this = expression.this 835 if isinstance(this, exp.Array): 836 this.replace(exp.paren(this)) 837 838 bracket = super().bracket_sql(expression) 839 840 if not expression.args.get("returns_list_for_maps"): 841 if not this.type: 842 from sqlglot.optimizer.annotate_types import annotate_types 843 844 this = annotate_types(this) 845 846 if this.is_type(exp.DataType.Type.MAP): 847 bracket = f"({bracket})[1]" 848 849 return bracket
851 def withingroup_sql(self, expression: exp.WithinGroup) -> str: 852 expression_sql = self.sql(expression, "expression") 853 854 func = expression.this 855 if isinstance(func, exp.PERCENTILES): 856 # Make the order key the first arg and slide the fraction to the right 857 # https://duckdb.org/docs/sql/aggregates#ordered-set-aggregate-functions 858 order_col = expression.find(exp.Ordered) 859 if order_col: 860 func.set("expression", func.this) 861 func.set("this", order_col.this) 862 863 this = self.sql(expression, "this").rstrip(")") 864 865 return f"{this}{expression_sql})"
867 def length_sql(self, expression: exp.Length) -> str: 868 arg = expression.this 869 870 # Dialects like BQ and Snowflake also accept binary values as args, so 871 # DDB will attempt to infer the type or resort to case/when resolution 872 if not expression.args.get("binary") or arg.is_string: 873 return self.func("LENGTH", arg) 874 875 if not arg.type: 876 from sqlglot.optimizer.annotate_types import annotate_types 877 878 arg = annotate_types(arg) 879 880 if arg.is_type(*exp.DataType.TEXT_TYPES): 881 return self.func("LENGTH", arg) 882 883 # We need these casts to make duckdb's static type checker happy 884 blob = exp.cast(arg, exp.DataType.Type.VARBINARY) 885 varchar = exp.cast(arg, exp.DataType.Type.VARCHAR) 886 887 case = ( 888 exp.case(self.func("TYPEOF", arg)) 889 .when( 890 "'VARCHAR'", exp.Anonymous(this="LENGTH", expressions=[varchar]) 891 ) # anonymous to break length_sql recursion 892 .when("'BLOB'", self.func("OCTET_LENGTH", blob)) 893 ) 894 895 return self.sql(case)
897 def objectinsert_sql(self, expression: exp.ObjectInsert) -> str: 898 this = expression.this 899 key = expression.args.get("key") 900 key_sql = key.name if isinstance(key, exp.Expression) else "" 901 value_sql = self.sql(expression, "value") 902 903 kv_sql = f"{key_sql} := {value_sql}" 904 905 # If the input struct is empty e.g. transpiling OBJECT_INSERT(OBJECT_CONSTRUCT(), key, value) from Snowflake 906 # then we can generate STRUCT_PACK which will build it since STRUCT_INSERT({}, key := value) is not valid DuckDB 907 if isinstance(this, exp.Struct) and not this.expressions: 908 return self.func("STRUCT_PACK", kv_sql) 909 910 return self.func("STRUCT_INSERT", this, kv_sql)
912 def unnest_sql(self, expression: exp.Unnest) -> str: 913 explode_array = expression.args.get("explode_array") 914 if explode_array: 915 # In BigQuery, UNNESTing a nested array leads to explosion of the top-level array & struct 916 # This is transpiled to DDB by transforming "FROM UNNEST(...)" to "FROM (SELECT UNNEST(..., max_depth => 2))" 917 expression.expressions.append( 918 exp.Kwarg(this=exp.var("max_depth"), expression=exp.Literal.number(2)) 919 ) 920 921 # If BQ's UNNEST is aliased, we transform it from a column alias to a table alias in DDB 922 alias = expression.args.get("alias") 923 if alias: 924 expression.set("alias", None) 925 alias = exp.TableAlias(this=seq_get(alias.args.get("columns"), 0)) 926 927 unnest_sql = super().unnest_sql(expression) 928 select = exp.Select(expressions=[unnest_sql]).subquery(alias) 929 return self.sql(select) 930 931 return super().unnest_sql(expression)
933 def ignorenulls_sql(self, expression: exp.IgnoreNulls) -> str: 934 if isinstance(expression.this, WINDOW_FUNCS_WITH_IGNORE_NULLS): 935 # DuckDB should render IGNORE NULLS only for the general-purpose 936 # window functions that accept it e.g. FIRST_VALUE(... IGNORE NULLS) OVER (...) 937 return super().ignorenulls_sql(expression) 938 939 return self.sql(expression, "this")
941 def arraytostring_sql(self, expression: exp.ArrayToString) -> str: 942 this = self.sql(expression, "this") 943 null_text = self.sql(expression, "null") 944 945 if null_text: 946 this = f"LIST_TRANSFORM({this}, x -> COALESCE(x, {null_text}))" 947 948 return self.func("ARRAY_TO_STRING", this, expression.expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_ONLY_LITERALS
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_WITH_METHOD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- LIKE_PROPERTY_INSIDE_SCHEMA
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- SUPPORTS_EXPLODING_PROJECTIONS
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- trycast_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql