sqlglot.parser
1from __future__ import annotations 2 3import logging 4import typing as t 5from collections import defaultdict 6 7from sqlglot import exp 8from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 9from sqlglot.helper import apply_index_offset, ensure_list, seq_get 10from sqlglot.time import format_time 11from sqlglot.tokens import Token, Tokenizer, TokenType 12from sqlglot.trie import TrieResult, in_trie, new_trie 13 14if t.TYPE_CHECKING: 15 from sqlglot._typing import E, Lit 16 from sqlglot.dialects.dialect import Dialect, DialectType 17 18 T = t.TypeVar("T") 19 20logger = logging.getLogger("sqlglot") 21 22OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 23 24 25def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 26 if len(args) == 1 and args[0].is_star: 27 return exp.StarMap(this=args[0]) 28 29 keys = [] 30 values = [] 31 for i in range(0, len(args), 2): 32 keys.append(args[i]) 33 values.append(args[i + 1]) 34 35 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 36 37 38def build_like(args: t.List) -> exp.Escape | exp.Like: 39 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 40 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 41 42 43def binary_range_parser( 44 expr_type: t.Type[exp.Expression], reverse_args: bool = False 45) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 46 def _parse_binary_range( 47 self: Parser, this: t.Optional[exp.Expression] 48 ) -> t.Optional[exp.Expression]: 49 expression = self._parse_bitwise() 50 if reverse_args: 51 this, expression = expression, this 52 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 53 54 return _parse_binary_range 55 56 57def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 58 # Default argument order is base, expression 59 this = seq_get(args, 0) 60 expression = seq_get(args, 1) 61 62 if expression: 63 if not dialect.LOG_BASE_FIRST: 64 this, expression = expression, this 65 return exp.Log(this=this, expression=expression) 66 67 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 68 69 70def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 71 arg = seq_get(args, 0) 72 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 73 74 75def build_lower(args: t.List) -> exp.Lower | exp.Hex: 76 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 79 80 81def build_upper(args: t.List) -> exp.Upper | exp.Hex: 82 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 85 86 87def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 88 def _builder(args: t.List, dialect: Dialect) -> E: 89 expression = expr_type( 90 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 91 ) 92 if len(args) > 2 and expr_type is exp.JSONExtract: 93 expression.set("expressions", args[2:]) 94 95 return expression 96 97 return _builder 98 99 100def build_mod(args: t.List) -> exp.Mod: 101 this = seq_get(args, 0) 102 expression = seq_get(args, 1) 103 104 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 105 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 106 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 107 108 return exp.Mod(this=this, expression=expression) 109 110 111def build_pad(args: t.List, is_left: bool = True): 112 return exp.Pad( 113 this=seq_get(args, 0), 114 expression=seq_get(args, 1), 115 fill_pattern=seq_get(args, 2), 116 is_left=is_left, 117 ) 118 119 120def build_array_constructor( 121 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 122) -> exp.Expression: 123 array_exp = exp_class(expressions=args) 124 125 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 126 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 127 128 return array_exp 129 130 131def build_convert_timezone( 132 args: t.List, default_source_tz: t.Optional[str] = None 133) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 134 if len(args) == 2: 135 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 136 return exp.ConvertTimezone( 137 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 138 ) 139 140 return exp.ConvertTimezone.from_arg_list(args) 141 142 143def build_trim(args: t.List, is_left: bool = True): 144 return exp.Trim( 145 this=seq_get(args, 0), 146 expression=seq_get(args, 1), 147 position="LEADING" if is_left else "TRAILING", 148 ) 149 150 151def build_coalesce(args: t.List, is_nvl: t.Optional[bool] = None) -> exp.Coalesce: 152 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl) 153 154 155class _Parser(type): 156 def __new__(cls, clsname, bases, attrs): 157 klass = super().__new__(cls, clsname, bases, attrs) 158 159 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 160 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 161 162 return klass 163 164 165class Parser(metaclass=_Parser): 166 """ 167 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 168 169 Args: 170 error_level: The desired error level. 171 Default: ErrorLevel.IMMEDIATE 172 error_message_context: The amount of context to capture from a query string when displaying 173 the error message (in number of characters). 174 Default: 100 175 max_errors: Maximum number of error messages to include in a raised ParseError. 176 This is only relevant if error_level is ErrorLevel.RAISE. 177 Default: 3 178 """ 179 180 FUNCTIONS: t.Dict[str, t.Callable] = { 181 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 182 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 183 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 184 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 185 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 186 ), 187 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 188 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 189 ), 190 "CHAR": lambda args: exp.Chr(expressions=args), 191 "CHR": lambda args: exp.Chr(expressions=args), 192 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 193 "CONCAT": lambda args, dialect: exp.Concat( 194 expressions=args, 195 safe=not dialect.STRICT_STRING_CONCAT, 196 coalesce=dialect.CONCAT_COALESCE, 197 ), 198 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 199 expressions=args, 200 safe=not dialect.STRICT_STRING_CONCAT, 201 coalesce=dialect.CONCAT_COALESCE, 202 ), 203 "CONVERT_TIMEZONE": build_convert_timezone, 204 "DATE_TO_DATE_STR": lambda args: exp.Cast( 205 this=seq_get(args, 0), 206 to=exp.DataType(this=exp.DataType.Type.TEXT), 207 ), 208 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 209 start=seq_get(args, 0), 210 end=seq_get(args, 1), 211 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 212 ), 213 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 214 "HEX": build_hex, 215 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 216 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 217 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 218 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 219 "LIKE": build_like, 220 "LOG": build_logarithm, 221 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 222 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 223 "LOWER": build_lower, 224 "LPAD": lambda args: build_pad(args), 225 "LEFTPAD": lambda args: build_pad(args), 226 "LTRIM": lambda args: build_trim(args), 227 "MOD": build_mod, 228 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 229 "RPAD": lambda args: build_pad(args, is_left=False), 230 "RTRIM": lambda args: build_trim(args, is_left=False), 231 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 232 if len(args) != 2 233 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 234 "TIME_TO_TIME_STR": lambda args: exp.Cast( 235 this=seq_get(args, 0), 236 to=exp.DataType(this=exp.DataType.Type.TEXT), 237 ), 238 "TO_HEX": build_hex, 239 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 240 this=exp.Cast( 241 this=seq_get(args, 0), 242 to=exp.DataType(this=exp.DataType.Type.TEXT), 243 ), 244 start=exp.Literal.number(1), 245 length=exp.Literal.number(10), 246 ), 247 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 248 "UPPER": build_upper, 249 "VAR_MAP": build_var_map, 250 } 251 252 NO_PAREN_FUNCTIONS = { 253 TokenType.CURRENT_DATE: exp.CurrentDate, 254 TokenType.CURRENT_DATETIME: exp.CurrentDate, 255 TokenType.CURRENT_TIME: exp.CurrentTime, 256 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 257 TokenType.CURRENT_USER: exp.CurrentUser, 258 } 259 260 STRUCT_TYPE_TOKENS = { 261 TokenType.NESTED, 262 TokenType.OBJECT, 263 TokenType.STRUCT, 264 TokenType.UNION, 265 } 266 267 NESTED_TYPE_TOKENS = { 268 TokenType.ARRAY, 269 TokenType.LIST, 270 TokenType.LOWCARDINALITY, 271 TokenType.MAP, 272 TokenType.NULLABLE, 273 TokenType.RANGE, 274 *STRUCT_TYPE_TOKENS, 275 } 276 277 ENUM_TYPE_TOKENS = { 278 TokenType.ENUM, 279 TokenType.ENUM8, 280 TokenType.ENUM16, 281 } 282 283 AGGREGATE_TYPE_TOKENS = { 284 TokenType.AGGREGATEFUNCTION, 285 TokenType.SIMPLEAGGREGATEFUNCTION, 286 } 287 288 TYPE_TOKENS = { 289 TokenType.BIT, 290 TokenType.BOOLEAN, 291 TokenType.TINYINT, 292 TokenType.UTINYINT, 293 TokenType.SMALLINT, 294 TokenType.USMALLINT, 295 TokenType.INT, 296 TokenType.UINT, 297 TokenType.BIGINT, 298 TokenType.UBIGINT, 299 TokenType.INT128, 300 TokenType.UINT128, 301 TokenType.INT256, 302 TokenType.UINT256, 303 TokenType.MEDIUMINT, 304 TokenType.UMEDIUMINT, 305 TokenType.FIXEDSTRING, 306 TokenType.FLOAT, 307 TokenType.DOUBLE, 308 TokenType.CHAR, 309 TokenType.NCHAR, 310 TokenType.VARCHAR, 311 TokenType.NVARCHAR, 312 TokenType.BPCHAR, 313 TokenType.TEXT, 314 TokenType.MEDIUMTEXT, 315 TokenType.LONGTEXT, 316 TokenType.MEDIUMBLOB, 317 TokenType.LONGBLOB, 318 TokenType.BINARY, 319 TokenType.VARBINARY, 320 TokenType.JSON, 321 TokenType.JSONB, 322 TokenType.INTERVAL, 323 TokenType.TINYBLOB, 324 TokenType.TINYTEXT, 325 TokenType.TIME, 326 TokenType.TIMETZ, 327 TokenType.TIMESTAMP, 328 TokenType.TIMESTAMP_S, 329 TokenType.TIMESTAMP_MS, 330 TokenType.TIMESTAMP_NS, 331 TokenType.TIMESTAMPTZ, 332 TokenType.TIMESTAMPLTZ, 333 TokenType.TIMESTAMPNTZ, 334 TokenType.DATETIME, 335 TokenType.DATETIME64, 336 TokenType.DATE, 337 TokenType.DATE32, 338 TokenType.INT4RANGE, 339 TokenType.INT4MULTIRANGE, 340 TokenType.INT8RANGE, 341 TokenType.INT8MULTIRANGE, 342 TokenType.NUMRANGE, 343 TokenType.NUMMULTIRANGE, 344 TokenType.TSRANGE, 345 TokenType.TSMULTIRANGE, 346 TokenType.TSTZRANGE, 347 TokenType.TSTZMULTIRANGE, 348 TokenType.DATERANGE, 349 TokenType.DATEMULTIRANGE, 350 TokenType.DECIMAL, 351 TokenType.DECIMAL32, 352 TokenType.DECIMAL64, 353 TokenType.DECIMAL128, 354 TokenType.UDECIMAL, 355 TokenType.BIGDECIMAL, 356 TokenType.UUID, 357 TokenType.GEOGRAPHY, 358 TokenType.GEOMETRY, 359 TokenType.HLLSKETCH, 360 TokenType.HSTORE, 361 TokenType.PSEUDO_TYPE, 362 TokenType.SUPER, 363 TokenType.SERIAL, 364 TokenType.SMALLSERIAL, 365 TokenType.BIGSERIAL, 366 TokenType.XML, 367 TokenType.YEAR, 368 TokenType.UNIQUEIDENTIFIER, 369 TokenType.USERDEFINED, 370 TokenType.MONEY, 371 TokenType.SMALLMONEY, 372 TokenType.ROWVERSION, 373 TokenType.IMAGE, 374 TokenType.VARIANT, 375 TokenType.VECTOR, 376 TokenType.OBJECT, 377 TokenType.OBJECT_IDENTIFIER, 378 TokenType.INET, 379 TokenType.IPADDRESS, 380 TokenType.IPPREFIX, 381 TokenType.IPV4, 382 TokenType.IPV6, 383 TokenType.UNKNOWN, 384 TokenType.NULL, 385 TokenType.NAME, 386 TokenType.TDIGEST, 387 *ENUM_TYPE_TOKENS, 388 *NESTED_TYPE_TOKENS, 389 *AGGREGATE_TYPE_TOKENS, 390 } 391 392 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 393 TokenType.BIGINT: TokenType.UBIGINT, 394 TokenType.INT: TokenType.UINT, 395 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 396 TokenType.SMALLINT: TokenType.USMALLINT, 397 TokenType.TINYINT: TokenType.UTINYINT, 398 TokenType.DECIMAL: TokenType.UDECIMAL, 399 } 400 401 SUBQUERY_PREDICATES = { 402 TokenType.ANY: exp.Any, 403 TokenType.ALL: exp.All, 404 TokenType.EXISTS: exp.Exists, 405 TokenType.SOME: exp.Any, 406 } 407 408 RESERVED_TOKENS = { 409 *Tokenizer.SINGLE_TOKENS.values(), 410 TokenType.SELECT, 411 } - {TokenType.IDENTIFIER} 412 413 DB_CREATABLES = { 414 TokenType.DATABASE, 415 TokenType.DICTIONARY, 416 TokenType.MODEL, 417 TokenType.SCHEMA, 418 TokenType.SEQUENCE, 419 TokenType.STORAGE_INTEGRATION, 420 TokenType.TABLE, 421 TokenType.TAG, 422 TokenType.VIEW, 423 TokenType.WAREHOUSE, 424 TokenType.STREAMLIT, 425 } 426 427 CREATABLES = { 428 TokenType.COLUMN, 429 TokenType.CONSTRAINT, 430 TokenType.FOREIGN_KEY, 431 TokenType.FUNCTION, 432 TokenType.INDEX, 433 TokenType.PROCEDURE, 434 *DB_CREATABLES, 435 } 436 437 ALTERABLES = { 438 TokenType.INDEX, 439 TokenType.TABLE, 440 TokenType.VIEW, 441 } 442 443 # Tokens that can represent identifiers 444 ID_VAR_TOKENS = { 445 TokenType.ALL, 446 TokenType.VAR, 447 TokenType.ANTI, 448 TokenType.APPLY, 449 TokenType.ASC, 450 TokenType.ASOF, 451 TokenType.AUTO_INCREMENT, 452 TokenType.BEGIN, 453 TokenType.BPCHAR, 454 TokenType.CACHE, 455 TokenType.CASE, 456 TokenType.COLLATE, 457 TokenType.COMMAND, 458 TokenType.COMMENT, 459 TokenType.COMMIT, 460 TokenType.CONSTRAINT, 461 TokenType.COPY, 462 TokenType.CUBE, 463 TokenType.DEFAULT, 464 TokenType.DELETE, 465 TokenType.DESC, 466 TokenType.DESCRIBE, 467 TokenType.DICTIONARY, 468 TokenType.DIV, 469 TokenType.END, 470 TokenType.EXECUTE, 471 TokenType.ESCAPE, 472 TokenType.FALSE, 473 TokenType.FIRST, 474 TokenType.FILTER, 475 TokenType.FINAL, 476 TokenType.FORMAT, 477 TokenType.FULL, 478 TokenType.IDENTIFIER, 479 TokenType.IS, 480 TokenType.ISNULL, 481 TokenType.INTERVAL, 482 TokenType.KEEP, 483 TokenType.KILL, 484 TokenType.LEFT, 485 TokenType.LOAD, 486 TokenType.MERGE, 487 TokenType.NATURAL, 488 TokenType.NEXT, 489 TokenType.OFFSET, 490 TokenType.OPERATOR, 491 TokenType.ORDINALITY, 492 TokenType.OVERLAPS, 493 TokenType.OVERWRITE, 494 TokenType.PARTITION, 495 TokenType.PERCENT, 496 TokenType.PIVOT, 497 TokenType.PRAGMA, 498 TokenType.RANGE, 499 TokenType.RECURSIVE, 500 TokenType.REFERENCES, 501 TokenType.REFRESH, 502 TokenType.RENAME, 503 TokenType.REPLACE, 504 TokenType.RIGHT, 505 TokenType.ROLLUP, 506 TokenType.ROW, 507 TokenType.ROWS, 508 TokenType.SEMI, 509 TokenType.SET, 510 TokenType.SETTINGS, 511 TokenType.SHOW, 512 TokenType.TEMPORARY, 513 TokenType.TOP, 514 TokenType.TRUE, 515 TokenType.TRUNCATE, 516 TokenType.UNIQUE, 517 TokenType.UNNEST, 518 TokenType.UNPIVOT, 519 TokenType.UPDATE, 520 TokenType.USE, 521 TokenType.VOLATILE, 522 TokenType.WINDOW, 523 *CREATABLES, 524 *SUBQUERY_PREDICATES, 525 *TYPE_TOKENS, 526 *NO_PAREN_FUNCTIONS, 527 } 528 ID_VAR_TOKENS.remove(TokenType.UNION) 529 530 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 531 532 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 533 TokenType.ANTI, 534 TokenType.APPLY, 535 TokenType.ASOF, 536 TokenType.FULL, 537 TokenType.LEFT, 538 TokenType.LOCK, 539 TokenType.NATURAL, 540 TokenType.OFFSET, 541 TokenType.RIGHT, 542 TokenType.SEMI, 543 TokenType.WINDOW, 544 } 545 546 ALIAS_TOKENS = ID_VAR_TOKENS 547 548 ARRAY_CONSTRUCTORS = { 549 "ARRAY": exp.Array, 550 "LIST": exp.List, 551 } 552 553 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 554 555 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 556 557 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 558 559 FUNC_TOKENS = { 560 TokenType.COLLATE, 561 TokenType.COMMAND, 562 TokenType.CURRENT_DATE, 563 TokenType.CURRENT_DATETIME, 564 TokenType.CURRENT_TIMESTAMP, 565 TokenType.CURRENT_TIME, 566 TokenType.CURRENT_USER, 567 TokenType.FILTER, 568 TokenType.FIRST, 569 TokenType.FORMAT, 570 TokenType.GLOB, 571 TokenType.IDENTIFIER, 572 TokenType.INDEX, 573 TokenType.ISNULL, 574 TokenType.ILIKE, 575 TokenType.INSERT, 576 TokenType.LIKE, 577 TokenType.MERGE, 578 TokenType.OFFSET, 579 TokenType.PRIMARY_KEY, 580 TokenType.RANGE, 581 TokenType.REPLACE, 582 TokenType.RLIKE, 583 TokenType.ROW, 584 TokenType.UNNEST, 585 TokenType.VAR, 586 TokenType.LEFT, 587 TokenType.RIGHT, 588 TokenType.SEQUENCE, 589 TokenType.DATE, 590 TokenType.DATETIME, 591 TokenType.TABLE, 592 TokenType.TIMESTAMP, 593 TokenType.TIMESTAMPTZ, 594 TokenType.TRUNCATE, 595 TokenType.WINDOW, 596 TokenType.XOR, 597 *TYPE_TOKENS, 598 *SUBQUERY_PREDICATES, 599 } 600 601 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 602 TokenType.AND: exp.And, 603 } 604 605 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 606 TokenType.COLON_EQ: exp.PropertyEQ, 607 } 608 609 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 610 TokenType.OR: exp.Or, 611 } 612 613 EQUALITY = { 614 TokenType.EQ: exp.EQ, 615 TokenType.NEQ: exp.NEQ, 616 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 617 } 618 619 COMPARISON = { 620 TokenType.GT: exp.GT, 621 TokenType.GTE: exp.GTE, 622 TokenType.LT: exp.LT, 623 TokenType.LTE: exp.LTE, 624 } 625 626 BITWISE = { 627 TokenType.AMP: exp.BitwiseAnd, 628 TokenType.CARET: exp.BitwiseXor, 629 TokenType.PIPE: exp.BitwiseOr, 630 } 631 632 TERM = { 633 TokenType.DASH: exp.Sub, 634 TokenType.PLUS: exp.Add, 635 TokenType.MOD: exp.Mod, 636 TokenType.COLLATE: exp.Collate, 637 } 638 639 FACTOR = { 640 TokenType.DIV: exp.IntDiv, 641 TokenType.LR_ARROW: exp.Distance, 642 TokenType.SLASH: exp.Div, 643 TokenType.STAR: exp.Mul, 644 } 645 646 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 647 648 TIMES = { 649 TokenType.TIME, 650 TokenType.TIMETZ, 651 } 652 653 TIMESTAMPS = { 654 TokenType.TIMESTAMP, 655 TokenType.TIMESTAMPTZ, 656 TokenType.TIMESTAMPLTZ, 657 *TIMES, 658 } 659 660 SET_OPERATIONS = { 661 TokenType.UNION, 662 TokenType.INTERSECT, 663 TokenType.EXCEPT, 664 } 665 666 JOIN_METHODS = { 667 TokenType.ASOF, 668 TokenType.NATURAL, 669 TokenType.POSITIONAL, 670 } 671 672 JOIN_SIDES = { 673 TokenType.LEFT, 674 TokenType.RIGHT, 675 TokenType.FULL, 676 } 677 678 JOIN_KINDS = { 679 TokenType.ANTI, 680 TokenType.CROSS, 681 TokenType.INNER, 682 TokenType.OUTER, 683 TokenType.SEMI, 684 TokenType.STRAIGHT_JOIN, 685 } 686 687 JOIN_HINTS: t.Set[str] = set() 688 689 LAMBDAS = { 690 TokenType.ARROW: lambda self, expressions: self.expression( 691 exp.Lambda, 692 this=self._replace_lambda( 693 self._parse_assignment(), 694 expressions, 695 ), 696 expressions=expressions, 697 ), 698 TokenType.FARROW: lambda self, expressions: self.expression( 699 exp.Kwarg, 700 this=exp.var(expressions[0].name), 701 expression=self._parse_assignment(), 702 ), 703 } 704 705 COLUMN_OPERATORS = { 706 TokenType.DOT: None, 707 TokenType.DCOLON: lambda self, this, to: self.expression( 708 exp.Cast if self.STRICT_CAST else exp.TryCast, 709 this=this, 710 to=to, 711 ), 712 TokenType.ARROW: lambda self, this, path: self.expression( 713 exp.JSONExtract, 714 this=this, 715 expression=self.dialect.to_json_path(path), 716 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 717 ), 718 TokenType.DARROW: lambda self, this, path: self.expression( 719 exp.JSONExtractScalar, 720 this=this, 721 expression=self.dialect.to_json_path(path), 722 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 723 ), 724 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 725 exp.JSONBExtract, 726 this=this, 727 expression=path, 728 ), 729 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 730 exp.JSONBExtractScalar, 731 this=this, 732 expression=path, 733 ), 734 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 735 exp.JSONBContains, 736 this=this, 737 expression=key, 738 ), 739 } 740 741 EXPRESSION_PARSERS = { 742 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 743 exp.Column: lambda self: self._parse_column(), 744 exp.Condition: lambda self: self._parse_assignment(), 745 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 746 exp.Expression: lambda self: self._parse_expression(), 747 exp.From: lambda self: self._parse_from(joins=True), 748 exp.Group: lambda self: self._parse_group(), 749 exp.Having: lambda self: self._parse_having(), 750 exp.Identifier: lambda self: self._parse_id_var(), 751 exp.Join: lambda self: self._parse_join(), 752 exp.Lambda: lambda self: self._parse_lambda(), 753 exp.Lateral: lambda self: self._parse_lateral(), 754 exp.Limit: lambda self: self._parse_limit(), 755 exp.Offset: lambda self: self._parse_offset(), 756 exp.Order: lambda self: self._parse_order(), 757 exp.Ordered: lambda self: self._parse_ordered(), 758 exp.Properties: lambda self: self._parse_properties(), 759 exp.Qualify: lambda self: self._parse_qualify(), 760 exp.Returning: lambda self: self._parse_returning(), 761 exp.Select: lambda self: self._parse_select(), 762 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 763 exp.Table: lambda self: self._parse_table_parts(), 764 exp.TableAlias: lambda self: self._parse_table_alias(), 765 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 766 exp.Where: lambda self: self._parse_where(), 767 exp.Window: lambda self: self._parse_named_window(), 768 exp.With: lambda self: self._parse_with(), 769 "JOIN_TYPE": lambda self: self._parse_join_parts(), 770 } 771 772 STATEMENT_PARSERS = { 773 TokenType.ALTER: lambda self: self._parse_alter(), 774 TokenType.BEGIN: lambda self: self._parse_transaction(), 775 TokenType.CACHE: lambda self: self._parse_cache(), 776 TokenType.COMMENT: lambda self: self._parse_comment(), 777 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 778 TokenType.COPY: lambda self: self._parse_copy(), 779 TokenType.CREATE: lambda self: self._parse_create(), 780 TokenType.DELETE: lambda self: self._parse_delete(), 781 TokenType.DESC: lambda self: self._parse_describe(), 782 TokenType.DESCRIBE: lambda self: self._parse_describe(), 783 TokenType.DROP: lambda self: self._parse_drop(), 784 TokenType.GRANT: lambda self: self._parse_grant(), 785 TokenType.INSERT: lambda self: self._parse_insert(), 786 TokenType.KILL: lambda self: self._parse_kill(), 787 TokenType.LOAD: lambda self: self._parse_load(), 788 TokenType.MERGE: lambda self: self._parse_merge(), 789 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 790 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 791 TokenType.REFRESH: lambda self: self._parse_refresh(), 792 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 793 TokenType.SET: lambda self: self._parse_set(), 794 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 795 TokenType.UNCACHE: lambda self: self._parse_uncache(), 796 TokenType.UPDATE: lambda self: self._parse_update(), 797 TokenType.USE: lambda self: self.expression( 798 exp.Use, 799 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 800 this=self._parse_table(schema=False), 801 ), 802 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 803 } 804 805 UNARY_PARSERS = { 806 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 807 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 808 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 809 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 810 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 811 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 812 } 813 814 STRING_PARSERS = { 815 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 816 exp.RawString, this=token.text 817 ), 818 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 819 exp.National, this=token.text 820 ), 821 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 822 TokenType.STRING: lambda self, token: self.expression( 823 exp.Literal, this=token.text, is_string=True 824 ), 825 TokenType.UNICODE_STRING: lambda self, token: self.expression( 826 exp.UnicodeString, 827 this=token.text, 828 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 829 ), 830 } 831 832 NUMERIC_PARSERS = { 833 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 834 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 835 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 836 TokenType.NUMBER: lambda self, token: self.expression( 837 exp.Literal, this=token.text, is_string=False 838 ), 839 } 840 841 PRIMARY_PARSERS = { 842 **STRING_PARSERS, 843 **NUMERIC_PARSERS, 844 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 845 TokenType.NULL: lambda self, _: self.expression(exp.Null), 846 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 847 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 848 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 849 TokenType.STAR: lambda self, _: self._parse_star_ops(), 850 } 851 852 PLACEHOLDER_PARSERS = { 853 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 854 TokenType.PARAMETER: lambda self: self._parse_parameter(), 855 TokenType.COLON: lambda self: ( 856 self.expression(exp.Placeholder, this=self._prev.text) 857 if self._match_set(self.ID_VAR_TOKENS) 858 else None 859 ), 860 } 861 862 RANGE_PARSERS = { 863 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 864 TokenType.GLOB: binary_range_parser(exp.Glob), 865 TokenType.ILIKE: binary_range_parser(exp.ILike), 866 TokenType.IN: lambda self, this: self._parse_in(this), 867 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 868 TokenType.IS: lambda self, this: self._parse_is(this), 869 TokenType.LIKE: binary_range_parser(exp.Like), 870 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 871 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 872 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 873 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 874 } 875 876 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 877 "ALLOWED_VALUES": lambda self: self.expression( 878 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 879 ), 880 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 881 "AUTO": lambda self: self._parse_auto_property(), 882 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 883 "BACKUP": lambda self: self.expression( 884 exp.BackupProperty, this=self._parse_var(any_token=True) 885 ), 886 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 887 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 888 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 889 "CHECKSUM": lambda self: self._parse_checksum(), 890 "CLUSTER BY": lambda self: self._parse_cluster(), 891 "CLUSTERED": lambda self: self._parse_clustered_by(), 892 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 893 exp.CollateProperty, **kwargs 894 ), 895 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 896 "CONTAINS": lambda self: self._parse_contains_property(), 897 "COPY": lambda self: self._parse_copy_property(), 898 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 899 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 900 "DEFINER": lambda self: self._parse_definer(), 901 "DETERMINISTIC": lambda self: self.expression( 902 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 903 ), 904 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 905 "DUPLICATE": lambda self: self._parse_duplicate(), 906 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 907 "DISTKEY": lambda self: self._parse_distkey(), 908 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 909 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 910 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 911 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 912 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 913 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 914 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 915 "FREESPACE": lambda self: self._parse_freespace(), 916 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 917 "HEAP": lambda self: self.expression(exp.HeapProperty), 918 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 919 "IMMUTABLE": lambda self: self.expression( 920 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 921 ), 922 "INHERITS": lambda self: self.expression( 923 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 924 ), 925 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 926 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 927 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 928 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 929 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 930 "LIKE": lambda self: self._parse_create_like(), 931 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 932 "LOCK": lambda self: self._parse_locking(), 933 "LOCKING": lambda self: self._parse_locking(), 934 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 935 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 936 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 937 "MODIFIES": lambda self: self._parse_modifies_property(), 938 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 939 "NO": lambda self: self._parse_no_property(), 940 "ON": lambda self: self._parse_on_property(), 941 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 942 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 943 "PARTITION": lambda self: self._parse_partitioned_of(), 944 "PARTITION BY": lambda self: self._parse_partitioned_by(), 945 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 946 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 947 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 948 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 949 "READS": lambda self: self._parse_reads_property(), 950 "REMOTE": lambda self: self._parse_remote_with_connection(), 951 "RETURNS": lambda self: self._parse_returns(), 952 "STRICT": lambda self: self.expression(exp.StrictProperty), 953 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 954 "ROW": lambda self: self._parse_row(), 955 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 956 "SAMPLE": lambda self: self.expression( 957 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 958 ), 959 "SECURE": lambda self: self.expression(exp.SecureProperty), 960 "SECURITY": lambda self: self._parse_security(), 961 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 962 "SETTINGS": lambda self: self._parse_settings_property(), 963 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 964 "SORTKEY": lambda self: self._parse_sortkey(), 965 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 966 "STABLE": lambda self: self.expression( 967 exp.StabilityProperty, this=exp.Literal.string("STABLE") 968 ), 969 "STORED": lambda self: self._parse_stored(), 970 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 971 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 972 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 973 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 974 "TO": lambda self: self._parse_to_table(), 975 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 976 "TRANSFORM": lambda self: self.expression( 977 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 978 ), 979 "TTL": lambda self: self._parse_ttl(), 980 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 981 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 982 "VOLATILE": lambda self: self._parse_volatile_property(), 983 "WITH": lambda self: self._parse_with_property(), 984 } 985 986 CONSTRAINT_PARSERS = { 987 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 988 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 989 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 990 "CHARACTER SET": lambda self: self.expression( 991 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 992 ), 993 "CHECK": lambda self: self.expression( 994 exp.CheckColumnConstraint, 995 this=self._parse_wrapped(self._parse_assignment), 996 enforced=self._match_text_seq("ENFORCED"), 997 ), 998 "COLLATE": lambda self: self.expression( 999 exp.CollateColumnConstraint, 1000 this=self._parse_identifier() or self._parse_column(), 1001 ), 1002 "COMMENT": lambda self: self.expression( 1003 exp.CommentColumnConstraint, this=self._parse_string() 1004 ), 1005 "COMPRESS": lambda self: self._parse_compress(), 1006 "CLUSTERED": lambda self: self.expression( 1007 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1008 ), 1009 "NONCLUSTERED": lambda self: self.expression( 1010 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1011 ), 1012 "DEFAULT": lambda self: self.expression( 1013 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1014 ), 1015 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1016 "EPHEMERAL": lambda self: self.expression( 1017 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1018 ), 1019 "EXCLUDE": lambda self: self.expression( 1020 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1021 ), 1022 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1023 "FORMAT": lambda self: self.expression( 1024 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1025 ), 1026 "GENERATED": lambda self: self._parse_generated_as_identity(), 1027 "IDENTITY": lambda self: self._parse_auto_increment(), 1028 "INLINE": lambda self: self._parse_inline(), 1029 "LIKE": lambda self: self._parse_create_like(), 1030 "NOT": lambda self: self._parse_not_constraint(), 1031 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1032 "ON": lambda self: ( 1033 self._match(TokenType.UPDATE) 1034 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1035 ) 1036 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1037 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1038 "PERIOD": lambda self: self._parse_period_for_system_time(), 1039 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1040 "REFERENCES": lambda self: self._parse_references(match=False), 1041 "TITLE": lambda self: self.expression( 1042 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1043 ), 1044 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1045 "UNIQUE": lambda self: self._parse_unique(), 1046 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1047 "WITH": lambda self: self.expression( 1048 exp.Properties, expressions=self._parse_wrapped_properties() 1049 ), 1050 } 1051 1052 ALTER_PARSERS = { 1053 "ADD": lambda self: self._parse_alter_table_add(), 1054 "ALTER": lambda self: self._parse_alter_table_alter(), 1055 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1056 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1057 "DROP": lambda self: self._parse_alter_table_drop(), 1058 "RENAME": lambda self: self._parse_alter_table_rename(), 1059 "SET": lambda self: self._parse_alter_table_set(), 1060 "AS": lambda self: self._parse_select(), 1061 } 1062 1063 ALTER_ALTER_PARSERS = { 1064 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1065 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1066 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1067 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1068 } 1069 1070 SCHEMA_UNNAMED_CONSTRAINTS = { 1071 "CHECK", 1072 "EXCLUDE", 1073 "FOREIGN KEY", 1074 "LIKE", 1075 "PERIOD", 1076 "PRIMARY KEY", 1077 "UNIQUE", 1078 } 1079 1080 NO_PAREN_FUNCTION_PARSERS = { 1081 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1082 "CASE": lambda self: self._parse_case(), 1083 "CONNECT_BY_ROOT": lambda self: self.expression( 1084 exp.ConnectByRoot, this=self._parse_column() 1085 ), 1086 "IF": lambda self: self._parse_if(), 1087 "NEXT": lambda self: self._parse_next_value_for(), 1088 } 1089 1090 INVALID_FUNC_NAME_TOKENS = { 1091 TokenType.IDENTIFIER, 1092 TokenType.STRING, 1093 } 1094 1095 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1096 1097 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1098 1099 FUNCTION_PARSERS = { 1100 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1101 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1102 "DECODE": lambda self: self._parse_decode(), 1103 "EXTRACT": lambda self: self._parse_extract(), 1104 "GAP_FILL": lambda self: self._parse_gap_fill(), 1105 "JSON_OBJECT": lambda self: self._parse_json_object(), 1106 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1107 "JSON_TABLE": lambda self: self._parse_json_table(), 1108 "MATCH": lambda self: self._parse_match_against(), 1109 "NORMALIZE": lambda self: self._parse_normalize(), 1110 "OPENJSON": lambda self: self._parse_open_json(), 1111 "OVERLAY": lambda self: self._parse_overlay(), 1112 "POSITION": lambda self: self._parse_position(), 1113 "PREDICT": lambda self: self._parse_predict(), 1114 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1115 "STRING_AGG": lambda self: self._parse_string_agg(), 1116 "SUBSTRING": lambda self: self._parse_substring(), 1117 "TRIM": lambda self: self._parse_trim(), 1118 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1119 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1120 } 1121 1122 QUERY_MODIFIER_PARSERS = { 1123 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1124 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1125 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1126 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1127 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1128 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1129 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1130 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1131 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1132 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1133 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1134 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1135 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1136 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1137 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1138 TokenType.CLUSTER_BY: lambda self: ( 1139 "cluster", 1140 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1141 ), 1142 TokenType.DISTRIBUTE_BY: lambda self: ( 1143 "distribute", 1144 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1145 ), 1146 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1147 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1148 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1149 } 1150 1151 SET_PARSERS = { 1152 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1153 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1154 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1155 "TRANSACTION": lambda self: self._parse_set_transaction(), 1156 } 1157 1158 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1159 1160 TYPE_LITERAL_PARSERS = { 1161 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1162 } 1163 1164 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1165 1166 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1167 1168 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1169 1170 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1171 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1172 "ISOLATION": ( 1173 ("LEVEL", "REPEATABLE", "READ"), 1174 ("LEVEL", "READ", "COMMITTED"), 1175 ("LEVEL", "READ", "UNCOMITTED"), 1176 ("LEVEL", "SERIALIZABLE"), 1177 ), 1178 "READ": ("WRITE", "ONLY"), 1179 } 1180 1181 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1182 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1183 ) 1184 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1185 1186 CREATE_SEQUENCE: OPTIONS_TYPE = { 1187 "SCALE": ("EXTEND", "NOEXTEND"), 1188 "SHARD": ("EXTEND", "NOEXTEND"), 1189 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1190 **dict.fromkeys( 1191 ( 1192 "SESSION", 1193 "GLOBAL", 1194 "KEEP", 1195 "NOKEEP", 1196 "ORDER", 1197 "NOORDER", 1198 "NOCACHE", 1199 "CYCLE", 1200 "NOCYCLE", 1201 "NOMINVALUE", 1202 "NOMAXVALUE", 1203 "NOSCALE", 1204 "NOSHARD", 1205 ), 1206 tuple(), 1207 ), 1208 } 1209 1210 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1211 1212 USABLES: OPTIONS_TYPE = dict.fromkeys( 1213 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1214 ) 1215 1216 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1217 1218 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1219 "TYPE": ("EVOLUTION",), 1220 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1221 } 1222 1223 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1224 "NOT": ("ENFORCED",), 1225 "MATCH": ( 1226 "FULL", 1227 "PARTIAL", 1228 "SIMPLE", 1229 ), 1230 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1231 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1232 } 1233 1234 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1235 1236 CLONE_KEYWORDS = {"CLONE", "COPY"} 1237 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1238 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1239 1240 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1241 1242 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1243 1244 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1245 1246 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1247 1248 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1249 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1250 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1251 1252 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1253 1254 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1255 1256 ADD_CONSTRAINT_TOKENS = { 1257 TokenType.CONSTRAINT, 1258 TokenType.FOREIGN_KEY, 1259 TokenType.INDEX, 1260 TokenType.KEY, 1261 TokenType.PRIMARY_KEY, 1262 TokenType.UNIQUE, 1263 } 1264 1265 DISTINCT_TOKENS = {TokenType.DISTINCT} 1266 1267 NULL_TOKENS = {TokenType.NULL} 1268 1269 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1270 1271 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1272 1273 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1274 1275 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1276 1277 ODBC_DATETIME_LITERALS = { 1278 "d": exp.Date, 1279 "t": exp.Time, 1280 "ts": exp.Timestamp, 1281 } 1282 1283 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1284 1285 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1286 1287 STRICT_CAST = True 1288 1289 PREFIXED_PIVOT_COLUMNS = False 1290 IDENTIFY_PIVOT_STRINGS = False 1291 1292 LOG_DEFAULTS_TO_LN = False 1293 1294 # Whether ADD is present for each column added by ALTER TABLE 1295 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1296 1297 # Whether the table sample clause expects CSV syntax 1298 TABLESAMPLE_CSV = False 1299 1300 # The default method used for table sampling 1301 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1302 1303 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1304 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1305 1306 # Whether the TRIM function expects the characters to trim as its first argument 1307 TRIM_PATTERN_FIRST = False 1308 1309 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1310 STRING_ALIASES = False 1311 1312 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1313 MODIFIERS_ATTACHED_TO_SET_OP = True 1314 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1315 1316 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1317 NO_PAREN_IF_COMMANDS = True 1318 1319 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1320 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1321 1322 # Whether the `:` operator is used to extract a value from a VARIANT column 1323 COLON_IS_VARIANT_EXTRACT = False 1324 1325 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1326 # If this is True and '(' is not found, the keyword will be treated as an identifier 1327 VALUES_FOLLOWED_BY_PAREN = True 1328 1329 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1330 SUPPORTS_IMPLICIT_UNNEST = False 1331 1332 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1333 INTERVAL_SPANS = True 1334 1335 # Whether a PARTITION clause can follow a table reference 1336 SUPPORTS_PARTITION_SELECTION = False 1337 1338 __slots__ = ( 1339 "error_level", 1340 "error_message_context", 1341 "max_errors", 1342 "dialect", 1343 "sql", 1344 "errors", 1345 "_tokens", 1346 "_index", 1347 "_curr", 1348 "_next", 1349 "_prev", 1350 "_prev_comments", 1351 ) 1352 1353 # Autofilled 1354 SHOW_TRIE: t.Dict = {} 1355 SET_TRIE: t.Dict = {} 1356 1357 def __init__( 1358 self, 1359 error_level: t.Optional[ErrorLevel] = None, 1360 error_message_context: int = 100, 1361 max_errors: int = 3, 1362 dialect: DialectType = None, 1363 ): 1364 from sqlglot.dialects import Dialect 1365 1366 self.error_level = error_level or ErrorLevel.IMMEDIATE 1367 self.error_message_context = error_message_context 1368 self.max_errors = max_errors 1369 self.dialect = Dialect.get_or_raise(dialect) 1370 self.reset() 1371 1372 def reset(self): 1373 self.sql = "" 1374 self.errors = [] 1375 self._tokens = [] 1376 self._index = 0 1377 self._curr = None 1378 self._next = None 1379 self._prev = None 1380 self._prev_comments = None 1381 1382 def parse( 1383 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1384 ) -> t.List[t.Optional[exp.Expression]]: 1385 """ 1386 Parses a list of tokens and returns a list of syntax trees, one tree 1387 per parsed SQL statement. 1388 1389 Args: 1390 raw_tokens: The list of tokens. 1391 sql: The original SQL string, used to produce helpful debug messages. 1392 1393 Returns: 1394 The list of the produced syntax trees. 1395 """ 1396 return self._parse( 1397 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1398 ) 1399 1400 def parse_into( 1401 self, 1402 expression_types: exp.IntoType, 1403 raw_tokens: t.List[Token], 1404 sql: t.Optional[str] = None, 1405 ) -> t.List[t.Optional[exp.Expression]]: 1406 """ 1407 Parses a list of tokens into a given Expression type. If a collection of Expression 1408 types is given instead, this method will try to parse the token list into each one 1409 of them, stopping at the first for which the parsing succeeds. 1410 1411 Args: 1412 expression_types: The expression type(s) to try and parse the token list into. 1413 raw_tokens: The list of tokens. 1414 sql: The original SQL string, used to produce helpful debug messages. 1415 1416 Returns: 1417 The target Expression. 1418 """ 1419 errors = [] 1420 for expression_type in ensure_list(expression_types): 1421 parser = self.EXPRESSION_PARSERS.get(expression_type) 1422 if not parser: 1423 raise TypeError(f"No parser registered for {expression_type}") 1424 1425 try: 1426 return self._parse(parser, raw_tokens, sql) 1427 except ParseError as e: 1428 e.errors[0]["into_expression"] = expression_type 1429 errors.append(e) 1430 1431 raise ParseError( 1432 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1433 errors=merge_errors(errors), 1434 ) from errors[-1] 1435 1436 def _parse( 1437 self, 1438 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1439 raw_tokens: t.List[Token], 1440 sql: t.Optional[str] = None, 1441 ) -> t.List[t.Optional[exp.Expression]]: 1442 self.reset() 1443 self.sql = sql or "" 1444 1445 total = len(raw_tokens) 1446 chunks: t.List[t.List[Token]] = [[]] 1447 1448 for i, token in enumerate(raw_tokens): 1449 if token.token_type == TokenType.SEMICOLON: 1450 if token.comments: 1451 chunks.append([token]) 1452 1453 if i < total - 1: 1454 chunks.append([]) 1455 else: 1456 chunks[-1].append(token) 1457 1458 expressions = [] 1459 1460 for tokens in chunks: 1461 self._index = -1 1462 self._tokens = tokens 1463 self._advance() 1464 1465 expressions.append(parse_method(self)) 1466 1467 if self._index < len(self._tokens): 1468 self.raise_error("Invalid expression / Unexpected token") 1469 1470 self.check_errors() 1471 1472 return expressions 1473 1474 def check_errors(self) -> None: 1475 """Logs or raises any found errors, depending on the chosen error level setting.""" 1476 if self.error_level == ErrorLevel.WARN: 1477 for error in self.errors: 1478 logger.error(str(error)) 1479 elif self.error_level == ErrorLevel.RAISE and self.errors: 1480 raise ParseError( 1481 concat_messages(self.errors, self.max_errors), 1482 errors=merge_errors(self.errors), 1483 ) 1484 1485 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1486 """ 1487 Appends an error in the list of recorded errors or raises it, depending on the chosen 1488 error level setting. 1489 """ 1490 token = token or self._curr or self._prev or Token.string("") 1491 start = token.start 1492 end = token.end + 1 1493 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1494 highlight = self.sql[start:end] 1495 end_context = self.sql[end : end + self.error_message_context] 1496 1497 error = ParseError.new( 1498 f"{message}. Line {token.line}, Col: {token.col}.\n" 1499 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1500 description=message, 1501 line=token.line, 1502 col=token.col, 1503 start_context=start_context, 1504 highlight=highlight, 1505 end_context=end_context, 1506 ) 1507 1508 if self.error_level == ErrorLevel.IMMEDIATE: 1509 raise error 1510 1511 self.errors.append(error) 1512 1513 def expression( 1514 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1515 ) -> E: 1516 """ 1517 Creates a new, validated Expression. 1518 1519 Args: 1520 exp_class: The expression class to instantiate. 1521 comments: An optional list of comments to attach to the expression. 1522 kwargs: The arguments to set for the expression along with their respective values. 1523 1524 Returns: 1525 The target expression. 1526 """ 1527 instance = exp_class(**kwargs) 1528 instance.add_comments(comments) if comments else self._add_comments(instance) 1529 return self.validate_expression(instance) 1530 1531 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1532 if expression and self._prev_comments: 1533 expression.add_comments(self._prev_comments) 1534 self._prev_comments = None 1535 1536 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1537 """ 1538 Validates an Expression, making sure that all its mandatory arguments are set. 1539 1540 Args: 1541 expression: The expression to validate. 1542 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1543 1544 Returns: 1545 The validated expression. 1546 """ 1547 if self.error_level != ErrorLevel.IGNORE: 1548 for error_message in expression.error_messages(args): 1549 self.raise_error(error_message) 1550 1551 return expression 1552 1553 def _find_sql(self, start: Token, end: Token) -> str: 1554 return self.sql[start.start : end.end + 1] 1555 1556 def _is_connected(self) -> bool: 1557 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1558 1559 def _advance(self, times: int = 1) -> None: 1560 self._index += times 1561 self._curr = seq_get(self._tokens, self._index) 1562 self._next = seq_get(self._tokens, self._index + 1) 1563 1564 if self._index > 0: 1565 self._prev = self._tokens[self._index - 1] 1566 self._prev_comments = self._prev.comments 1567 else: 1568 self._prev = None 1569 self._prev_comments = None 1570 1571 def _retreat(self, index: int) -> None: 1572 if index != self._index: 1573 self._advance(index - self._index) 1574 1575 def _warn_unsupported(self) -> None: 1576 if len(self._tokens) <= 1: 1577 return 1578 1579 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1580 # interested in emitting a warning for the one being currently processed. 1581 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1582 1583 logger.warning( 1584 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1585 ) 1586 1587 def _parse_command(self) -> exp.Command: 1588 self._warn_unsupported() 1589 return self.expression( 1590 exp.Command, 1591 comments=self._prev_comments, 1592 this=self._prev.text.upper(), 1593 expression=self._parse_string(), 1594 ) 1595 1596 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1597 """ 1598 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1599 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1600 solve this by setting & resetting the parser state accordingly 1601 """ 1602 index = self._index 1603 error_level = self.error_level 1604 1605 self.error_level = ErrorLevel.IMMEDIATE 1606 try: 1607 this = parse_method() 1608 except ParseError: 1609 this = None 1610 finally: 1611 if not this or retreat: 1612 self._retreat(index) 1613 self.error_level = error_level 1614 1615 return this 1616 1617 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1618 start = self._prev 1619 exists = self._parse_exists() if allow_exists else None 1620 1621 self._match(TokenType.ON) 1622 1623 materialized = self._match_text_seq("MATERIALIZED") 1624 kind = self._match_set(self.CREATABLES) and self._prev 1625 if not kind: 1626 return self._parse_as_command(start) 1627 1628 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1629 this = self._parse_user_defined_function(kind=kind.token_type) 1630 elif kind.token_type == TokenType.TABLE: 1631 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1632 elif kind.token_type == TokenType.COLUMN: 1633 this = self._parse_column() 1634 else: 1635 this = self._parse_id_var() 1636 1637 self._match(TokenType.IS) 1638 1639 return self.expression( 1640 exp.Comment, 1641 this=this, 1642 kind=kind.text, 1643 expression=self._parse_string(), 1644 exists=exists, 1645 materialized=materialized, 1646 ) 1647 1648 def _parse_to_table( 1649 self, 1650 ) -> exp.ToTableProperty: 1651 table = self._parse_table_parts(schema=True) 1652 return self.expression(exp.ToTableProperty, this=table) 1653 1654 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1655 def _parse_ttl(self) -> exp.Expression: 1656 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1657 this = self._parse_bitwise() 1658 1659 if self._match_text_seq("DELETE"): 1660 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1661 if self._match_text_seq("RECOMPRESS"): 1662 return self.expression( 1663 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1664 ) 1665 if self._match_text_seq("TO", "DISK"): 1666 return self.expression( 1667 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1668 ) 1669 if self._match_text_seq("TO", "VOLUME"): 1670 return self.expression( 1671 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1672 ) 1673 1674 return this 1675 1676 expressions = self._parse_csv(_parse_ttl_action) 1677 where = self._parse_where() 1678 group = self._parse_group() 1679 1680 aggregates = None 1681 if group and self._match(TokenType.SET): 1682 aggregates = self._parse_csv(self._parse_set_item) 1683 1684 return self.expression( 1685 exp.MergeTreeTTL, 1686 expressions=expressions, 1687 where=where, 1688 group=group, 1689 aggregates=aggregates, 1690 ) 1691 1692 def _parse_statement(self) -> t.Optional[exp.Expression]: 1693 if self._curr is None: 1694 return None 1695 1696 if self._match_set(self.STATEMENT_PARSERS): 1697 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1698 1699 if self._match_set(self.dialect.tokenizer.COMMANDS): 1700 return self._parse_command() 1701 1702 expression = self._parse_expression() 1703 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1704 return self._parse_query_modifiers(expression) 1705 1706 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1707 start = self._prev 1708 temporary = self._match(TokenType.TEMPORARY) 1709 materialized = self._match_text_seq("MATERIALIZED") 1710 1711 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1712 if not kind: 1713 return self._parse_as_command(start) 1714 1715 concurrently = self._match_text_seq("CONCURRENTLY") 1716 if_exists = exists or self._parse_exists() 1717 table = self._parse_table_parts( 1718 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1719 ) 1720 1721 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1722 1723 if self._match(TokenType.L_PAREN, advance=False): 1724 expressions = self._parse_wrapped_csv(self._parse_types) 1725 else: 1726 expressions = None 1727 1728 return self.expression( 1729 exp.Drop, 1730 comments=start.comments, 1731 exists=if_exists, 1732 this=table, 1733 expressions=expressions, 1734 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1735 temporary=temporary, 1736 materialized=materialized, 1737 cascade=self._match_text_seq("CASCADE"), 1738 constraints=self._match_text_seq("CONSTRAINTS"), 1739 purge=self._match_text_seq("PURGE"), 1740 cluster=cluster, 1741 concurrently=concurrently, 1742 ) 1743 1744 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1745 return ( 1746 self._match_text_seq("IF") 1747 and (not not_ or self._match(TokenType.NOT)) 1748 and self._match(TokenType.EXISTS) 1749 ) 1750 1751 def _parse_create(self) -> exp.Create | exp.Command: 1752 # Note: this can't be None because we've matched a statement parser 1753 start = self._prev 1754 comments = self._prev_comments 1755 1756 replace = ( 1757 start.token_type == TokenType.REPLACE 1758 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1759 or self._match_pair(TokenType.OR, TokenType.ALTER) 1760 ) 1761 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1762 1763 unique = self._match(TokenType.UNIQUE) 1764 1765 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1766 clustered = True 1767 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1768 "COLUMNSTORE" 1769 ): 1770 clustered = False 1771 else: 1772 clustered = None 1773 1774 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1775 self._advance() 1776 1777 properties = None 1778 create_token = self._match_set(self.CREATABLES) and self._prev 1779 1780 if not create_token: 1781 # exp.Properties.Location.POST_CREATE 1782 properties = self._parse_properties() 1783 create_token = self._match_set(self.CREATABLES) and self._prev 1784 1785 if not properties or not create_token: 1786 return self._parse_as_command(start) 1787 1788 concurrently = self._match_text_seq("CONCURRENTLY") 1789 exists = self._parse_exists(not_=True) 1790 this = None 1791 expression: t.Optional[exp.Expression] = None 1792 indexes = None 1793 no_schema_binding = None 1794 begin = None 1795 end = None 1796 clone = None 1797 1798 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1799 nonlocal properties 1800 if properties and temp_props: 1801 properties.expressions.extend(temp_props.expressions) 1802 elif temp_props: 1803 properties = temp_props 1804 1805 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1806 this = self._parse_user_defined_function(kind=create_token.token_type) 1807 1808 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1809 extend_props(self._parse_properties()) 1810 1811 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1812 extend_props(self._parse_properties()) 1813 1814 if not expression: 1815 if self._match(TokenType.COMMAND): 1816 expression = self._parse_as_command(self._prev) 1817 else: 1818 begin = self._match(TokenType.BEGIN) 1819 return_ = self._match_text_seq("RETURN") 1820 1821 if self._match(TokenType.STRING, advance=False): 1822 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1823 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1824 expression = self._parse_string() 1825 extend_props(self._parse_properties()) 1826 else: 1827 expression = self._parse_statement() 1828 1829 end = self._match_text_seq("END") 1830 1831 if return_: 1832 expression = self.expression(exp.Return, this=expression) 1833 elif create_token.token_type == TokenType.INDEX: 1834 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1835 if not self._match(TokenType.ON): 1836 index = self._parse_id_var() 1837 anonymous = False 1838 else: 1839 index = None 1840 anonymous = True 1841 1842 this = self._parse_index(index=index, anonymous=anonymous) 1843 elif create_token.token_type in self.DB_CREATABLES: 1844 table_parts = self._parse_table_parts( 1845 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1846 ) 1847 1848 # exp.Properties.Location.POST_NAME 1849 self._match(TokenType.COMMA) 1850 extend_props(self._parse_properties(before=True)) 1851 1852 this = self._parse_schema(this=table_parts) 1853 1854 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1855 extend_props(self._parse_properties()) 1856 1857 self._match(TokenType.ALIAS) 1858 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1859 # exp.Properties.Location.POST_ALIAS 1860 extend_props(self._parse_properties()) 1861 1862 if create_token.token_type == TokenType.SEQUENCE: 1863 expression = self._parse_types() 1864 extend_props(self._parse_properties()) 1865 else: 1866 expression = self._parse_ddl_select() 1867 1868 if create_token.token_type == TokenType.TABLE: 1869 # exp.Properties.Location.POST_EXPRESSION 1870 extend_props(self._parse_properties()) 1871 1872 indexes = [] 1873 while True: 1874 index = self._parse_index() 1875 1876 # exp.Properties.Location.POST_INDEX 1877 extend_props(self._parse_properties()) 1878 if not index: 1879 break 1880 else: 1881 self._match(TokenType.COMMA) 1882 indexes.append(index) 1883 elif create_token.token_type == TokenType.VIEW: 1884 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1885 no_schema_binding = True 1886 1887 shallow = self._match_text_seq("SHALLOW") 1888 1889 if self._match_texts(self.CLONE_KEYWORDS): 1890 copy = self._prev.text.lower() == "copy" 1891 clone = self.expression( 1892 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1893 ) 1894 1895 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1896 return self._parse_as_command(start) 1897 1898 create_kind_text = create_token.text.upper() 1899 return self.expression( 1900 exp.Create, 1901 comments=comments, 1902 this=this, 1903 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1904 replace=replace, 1905 refresh=refresh, 1906 unique=unique, 1907 expression=expression, 1908 exists=exists, 1909 properties=properties, 1910 indexes=indexes, 1911 no_schema_binding=no_schema_binding, 1912 begin=begin, 1913 end=end, 1914 clone=clone, 1915 concurrently=concurrently, 1916 clustered=clustered, 1917 ) 1918 1919 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1920 seq = exp.SequenceProperties() 1921 1922 options = [] 1923 index = self._index 1924 1925 while self._curr: 1926 self._match(TokenType.COMMA) 1927 if self._match_text_seq("INCREMENT"): 1928 self._match_text_seq("BY") 1929 self._match_text_seq("=") 1930 seq.set("increment", self._parse_term()) 1931 elif self._match_text_seq("MINVALUE"): 1932 seq.set("minvalue", self._parse_term()) 1933 elif self._match_text_seq("MAXVALUE"): 1934 seq.set("maxvalue", self._parse_term()) 1935 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1936 self._match_text_seq("=") 1937 seq.set("start", self._parse_term()) 1938 elif self._match_text_seq("CACHE"): 1939 # T-SQL allows empty CACHE which is initialized dynamically 1940 seq.set("cache", self._parse_number() or True) 1941 elif self._match_text_seq("OWNED", "BY"): 1942 # "OWNED BY NONE" is the default 1943 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1944 else: 1945 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1946 if opt: 1947 options.append(opt) 1948 else: 1949 break 1950 1951 seq.set("options", options if options else None) 1952 return None if self._index == index else seq 1953 1954 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1955 # only used for teradata currently 1956 self._match(TokenType.COMMA) 1957 1958 kwargs = { 1959 "no": self._match_text_seq("NO"), 1960 "dual": self._match_text_seq("DUAL"), 1961 "before": self._match_text_seq("BEFORE"), 1962 "default": self._match_text_seq("DEFAULT"), 1963 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1964 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1965 "after": self._match_text_seq("AFTER"), 1966 "minimum": self._match_texts(("MIN", "MINIMUM")), 1967 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1968 } 1969 1970 if self._match_texts(self.PROPERTY_PARSERS): 1971 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1972 try: 1973 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1974 except TypeError: 1975 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1976 1977 return None 1978 1979 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1980 return self._parse_wrapped_csv(self._parse_property) 1981 1982 def _parse_property(self) -> t.Optional[exp.Expression]: 1983 if self._match_texts(self.PROPERTY_PARSERS): 1984 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1985 1986 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1987 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1988 1989 if self._match_text_seq("COMPOUND", "SORTKEY"): 1990 return self._parse_sortkey(compound=True) 1991 1992 if self._match_text_seq("SQL", "SECURITY"): 1993 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1994 1995 index = self._index 1996 key = self._parse_column() 1997 1998 if not self._match(TokenType.EQ): 1999 self._retreat(index) 2000 return self._parse_sequence_properties() 2001 2002 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2003 if isinstance(key, exp.Column): 2004 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2005 2006 value = self._parse_bitwise() or self._parse_var(any_token=True) 2007 2008 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2009 if isinstance(value, exp.Column): 2010 value = exp.var(value.name) 2011 2012 return self.expression(exp.Property, this=key, value=value) 2013 2014 def _parse_stored(self) -> exp.FileFormatProperty: 2015 self._match(TokenType.ALIAS) 2016 2017 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2018 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2019 2020 return self.expression( 2021 exp.FileFormatProperty, 2022 this=( 2023 self.expression( 2024 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2025 ) 2026 if input_format or output_format 2027 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2028 ), 2029 ) 2030 2031 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2032 field = self._parse_field() 2033 if isinstance(field, exp.Identifier) and not field.quoted: 2034 field = exp.var(field) 2035 2036 return field 2037 2038 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2039 self._match(TokenType.EQ) 2040 self._match(TokenType.ALIAS) 2041 2042 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2043 2044 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2045 properties = [] 2046 while True: 2047 if before: 2048 prop = self._parse_property_before() 2049 else: 2050 prop = self._parse_property() 2051 if not prop: 2052 break 2053 for p in ensure_list(prop): 2054 properties.append(p) 2055 2056 if properties: 2057 return self.expression(exp.Properties, expressions=properties) 2058 2059 return None 2060 2061 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2062 return self.expression( 2063 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2064 ) 2065 2066 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2067 if self._match_texts(("DEFINER", "INVOKER")): 2068 security_specifier = self._prev.text.upper() 2069 return self.expression(exp.SecurityProperty, this=security_specifier) 2070 return None 2071 2072 def _parse_settings_property(self) -> exp.SettingsProperty: 2073 return self.expression( 2074 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2075 ) 2076 2077 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2078 if self._index >= 2: 2079 pre_volatile_token = self._tokens[self._index - 2] 2080 else: 2081 pre_volatile_token = None 2082 2083 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2084 return exp.VolatileProperty() 2085 2086 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2087 2088 def _parse_retention_period(self) -> exp.Var: 2089 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2090 number = self._parse_number() 2091 number_str = f"{number} " if number else "" 2092 unit = self._parse_var(any_token=True) 2093 return exp.var(f"{number_str}{unit}") 2094 2095 def _parse_system_versioning_property( 2096 self, with_: bool = False 2097 ) -> exp.WithSystemVersioningProperty: 2098 self._match(TokenType.EQ) 2099 prop = self.expression( 2100 exp.WithSystemVersioningProperty, 2101 **{ # type: ignore 2102 "on": True, 2103 "with": with_, 2104 }, 2105 ) 2106 2107 if self._match_text_seq("OFF"): 2108 prop.set("on", False) 2109 return prop 2110 2111 self._match(TokenType.ON) 2112 if self._match(TokenType.L_PAREN): 2113 while self._curr and not self._match(TokenType.R_PAREN): 2114 if self._match_text_seq("HISTORY_TABLE", "="): 2115 prop.set("this", self._parse_table_parts()) 2116 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2117 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2118 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2119 prop.set("retention_period", self._parse_retention_period()) 2120 2121 self._match(TokenType.COMMA) 2122 2123 return prop 2124 2125 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2126 self._match(TokenType.EQ) 2127 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2128 prop = self.expression(exp.DataDeletionProperty, on=on) 2129 2130 if self._match(TokenType.L_PAREN): 2131 while self._curr and not self._match(TokenType.R_PAREN): 2132 if self._match_text_seq("FILTER_COLUMN", "="): 2133 prop.set("filter_column", self._parse_column()) 2134 elif self._match_text_seq("RETENTION_PERIOD", "="): 2135 prop.set("retention_period", self._parse_retention_period()) 2136 2137 self._match(TokenType.COMMA) 2138 2139 return prop 2140 2141 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2142 kind = "HASH" 2143 expressions: t.Optional[t.List[exp.Expression]] = None 2144 if self._match_text_seq("BY", "HASH"): 2145 expressions = self._parse_wrapped_csv(self._parse_id_var) 2146 elif self._match_text_seq("BY", "RANDOM"): 2147 kind = "RANDOM" 2148 2149 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2150 buckets: t.Optional[exp.Expression] = None 2151 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2152 buckets = self._parse_number() 2153 2154 return self.expression( 2155 exp.DistributedByProperty, 2156 expressions=expressions, 2157 kind=kind, 2158 buckets=buckets, 2159 order=self._parse_order(), 2160 ) 2161 2162 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2163 self._match_text_seq("KEY") 2164 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2165 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2166 2167 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2168 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2169 prop = self._parse_system_versioning_property(with_=True) 2170 self._match_r_paren() 2171 return prop 2172 2173 if self._match(TokenType.L_PAREN, advance=False): 2174 return self._parse_wrapped_properties() 2175 2176 if self._match_text_seq("JOURNAL"): 2177 return self._parse_withjournaltable() 2178 2179 if self._match_texts(self.VIEW_ATTRIBUTES): 2180 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2181 2182 if self._match_text_seq("DATA"): 2183 return self._parse_withdata(no=False) 2184 elif self._match_text_seq("NO", "DATA"): 2185 return self._parse_withdata(no=True) 2186 2187 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2188 return self._parse_serde_properties(with_=True) 2189 2190 if self._match(TokenType.SCHEMA): 2191 return self.expression( 2192 exp.WithSchemaBindingProperty, 2193 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2194 ) 2195 2196 if not self._next: 2197 return None 2198 2199 return self._parse_withisolatedloading() 2200 2201 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2202 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2203 self._match(TokenType.EQ) 2204 2205 user = self._parse_id_var() 2206 self._match(TokenType.PARAMETER) 2207 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2208 2209 if not user or not host: 2210 return None 2211 2212 return exp.DefinerProperty(this=f"{user}@{host}") 2213 2214 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2215 self._match(TokenType.TABLE) 2216 self._match(TokenType.EQ) 2217 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2218 2219 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2220 return self.expression(exp.LogProperty, no=no) 2221 2222 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2223 return self.expression(exp.JournalProperty, **kwargs) 2224 2225 def _parse_checksum(self) -> exp.ChecksumProperty: 2226 self._match(TokenType.EQ) 2227 2228 on = None 2229 if self._match(TokenType.ON): 2230 on = True 2231 elif self._match_text_seq("OFF"): 2232 on = False 2233 2234 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2235 2236 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2237 return self.expression( 2238 exp.Cluster, 2239 expressions=( 2240 self._parse_wrapped_csv(self._parse_ordered) 2241 if wrapped 2242 else self._parse_csv(self._parse_ordered) 2243 ), 2244 ) 2245 2246 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2247 self._match_text_seq("BY") 2248 2249 self._match_l_paren() 2250 expressions = self._parse_csv(self._parse_column) 2251 self._match_r_paren() 2252 2253 if self._match_text_seq("SORTED", "BY"): 2254 self._match_l_paren() 2255 sorted_by = self._parse_csv(self._parse_ordered) 2256 self._match_r_paren() 2257 else: 2258 sorted_by = None 2259 2260 self._match(TokenType.INTO) 2261 buckets = self._parse_number() 2262 self._match_text_seq("BUCKETS") 2263 2264 return self.expression( 2265 exp.ClusteredByProperty, 2266 expressions=expressions, 2267 sorted_by=sorted_by, 2268 buckets=buckets, 2269 ) 2270 2271 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2272 if not self._match_text_seq("GRANTS"): 2273 self._retreat(self._index - 1) 2274 return None 2275 2276 return self.expression(exp.CopyGrantsProperty) 2277 2278 def _parse_freespace(self) -> exp.FreespaceProperty: 2279 self._match(TokenType.EQ) 2280 return self.expression( 2281 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2282 ) 2283 2284 def _parse_mergeblockratio( 2285 self, no: bool = False, default: bool = False 2286 ) -> exp.MergeBlockRatioProperty: 2287 if self._match(TokenType.EQ): 2288 return self.expression( 2289 exp.MergeBlockRatioProperty, 2290 this=self._parse_number(), 2291 percent=self._match(TokenType.PERCENT), 2292 ) 2293 2294 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2295 2296 def _parse_datablocksize( 2297 self, 2298 default: t.Optional[bool] = None, 2299 minimum: t.Optional[bool] = None, 2300 maximum: t.Optional[bool] = None, 2301 ) -> exp.DataBlocksizeProperty: 2302 self._match(TokenType.EQ) 2303 size = self._parse_number() 2304 2305 units = None 2306 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2307 units = self._prev.text 2308 2309 return self.expression( 2310 exp.DataBlocksizeProperty, 2311 size=size, 2312 units=units, 2313 default=default, 2314 minimum=minimum, 2315 maximum=maximum, 2316 ) 2317 2318 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2319 self._match(TokenType.EQ) 2320 always = self._match_text_seq("ALWAYS") 2321 manual = self._match_text_seq("MANUAL") 2322 never = self._match_text_seq("NEVER") 2323 default = self._match_text_seq("DEFAULT") 2324 2325 autotemp = None 2326 if self._match_text_seq("AUTOTEMP"): 2327 autotemp = self._parse_schema() 2328 2329 return self.expression( 2330 exp.BlockCompressionProperty, 2331 always=always, 2332 manual=manual, 2333 never=never, 2334 default=default, 2335 autotemp=autotemp, 2336 ) 2337 2338 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2339 index = self._index 2340 no = self._match_text_seq("NO") 2341 concurrent = self._match_text_seq("CONCURRENT") 2342 2343 if not self._match_text_seq("ISOLATED", "LOADING"): 2344 self._retreat(index) 2345 return None 2346 2347 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2348 return self.expression( 2349 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2350 ) 2351 2352 def _parse_locking(self) -> exp.LockingProperty: 2353 if self._match(TokenType.TABLE): 2354 kind = "TABLE" 2355 elif self._match(TokenType.VIEW): 2356 kind = "VIEW" 2357 elif self._match(TokenType.ROW): 2358 kind = "ROW" 2359 elif self._match_text_seq("DATABASE"): 2360 kind = "DATABASE" 2361 else: 2362 kind = None 2363 2364 if kind in ("DATABASE", "TABLE", "VIEW"): 2365 this = self._parse_table_parts() 2366 else: 2367 this = None 2368 2369 if self._match(TokenType.FOR): 2370 for_or_in = "FOR" 2371 elif self._match(TokenType.IN): 2372 for_or_in = "IN" 2373 else: 2374 for_or_in = None 2375 2376 if self._match_text_seq("ACCESS"): 2377 lock_type = "ACCESS" 2378 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2379 lock_type = "EXCLUSIVE" 2380 elif self._match_text_seq("SHARE"): 2381 lock_type = "SHARE" 2382 elif self._match_text_seq("READ"): 2383 lock_type = "READ" 2384 elif self._match_text_seq("WRITE"): 2385 lock_type = "WRITE" 2386 elif self._match_text_seq("CHECKSUM"): 2387 lock_type = "CHECKSUM" 2388 else: 2389 lock_type = None 2390 2391 override = self._match_text_seq("OVERRIDE") 2392 2393 return self.expression( 2394 exp.LockingProperty, 2395 this=this, 2396 kind=kind, 2397 for_or_in=for_or_in, 2398 lock_type=lock_type, 2399 override=override, 2400 ) 2401 2402 def _parse_partition_by(self) -> t.List[exp.Expression]: 2403 if self._match(TokenType.PARTITION_BY): 2404 return self._parse_csv(self._parse_assignment) 2405 return [] 2406 2407 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2408 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2409 if self._match_text_seq("MINVALUE"): 2410 return exp.var("MINVALUE") 2411 if self._match_text_seq("MAXVALUE"): 2412 return exp.var("MAXVALUE") 2413 return self._parse_bitwise() 2414 2415 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2416 expression = None 2417 from_expressions = None 2418 to_expressions = None 2419 2420 if self._match(TokenType.IN): 2421 this = self._parse_wrapped_csv(self._parse_bitwise) 2422 elif self._match(TokenType.FROM): 2423 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2424 self._match_text_seq("TO") 2425 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2426 elif self._match_text_seq("WITH", "(", "MODULUS"): 2427 this = self._parse_number() 2428 self._match_text_seq(",", "REMAINDER") 2429 expression = self._parse_number() 2430 self._match_r_paren() 2431 else: 2432 self.raise_error("Failed to parse partition bound spec.") 2433 2434 return self.expression( 2435 exp.PartitionBoundSpec, 2436 this=this, 2437 expression=expression, 2438 from_expressions=from_expressions, 2439 to_expressions=to_expressions, 2440 ) 2441 2442 # https://www.postgresql.org/docs/current/sql-createtable.html 2443 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2444 if not self._match_text_seq("OF"): 2445 self._retreat(self._index - 1) 2446 return None 2447 2448 this = self._parse_table(schema=True) 2449 2450 if self._match(TokenType.DEFAULT): 2451 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2452 elif self._match_text_seq("FOR", "VALUES"): 2453 expression = self._parse_partition_bound_spec() 2454 else: 2455 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2456 2457 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2458 2459 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2460 self._match(TokenType.EQ) 2461 return self.expression( 2462 exp.PartitionedByProperty, 2463 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2464 ) 2465 2466 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2467 if self._match_text_seq("AND", "STATISTICS"): 2468 statistics = True 2469 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2470 statistics = False 2471 else: 2472 statistics = None 2473 2474 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2475 2476 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2477 if self._match_text_seq("SQL"): 2478 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2479 return None 2480 2481 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2482 if self._match_text_seq("SQL", "DATA"): 2483 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2484 return None 2485 2486 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2487 if self._match_text_seq("PRIMARY", "INDEX"): 2488 return exp.NoPrimaryIndexProperty() 2489 if self._match_text_seq("SQL"): 2490 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2491 return None 2492 2493 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2494 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2495 return exp.OnCommitProperty() 2496 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2497 return exp.OnCommitProperty(delete=True) 2498 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2499 2500 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2501 if self._match_text_seq("SQL", "DATA"): 2502 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2503 return None 2504 2505 def _parse_distkey(self) -> exp.DistKeyProperty: 2506 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2507 2508 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2509 table = self._parse_table(schema=True) 2510 2511 options = [] 2512 while self._match_texts(("INCLUDING", "EXCLUDING")): 2513 this = self._prev.text.upper() 2514 2515 id_var = self._parse_id_var() 2516 if not id_var: 2517 return None 2518 2519 options.append( 2520 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2521 ) 2522 2523 return self.expression(exp.LikeProperty, this=table, expressions=options) 2524 2525 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2526 return self.expression( 2527 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2528 ) 2529 2530 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2531 self._match(TokenType.EQ) 2532 return self.expression( 2533 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2534 ) 2535 2536 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2537 self._match_text_seq("WITH", "CONNECTION") 2538 return self.expression( 2539 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2540 ) 2541 2542 def _parse_returns(self) -> exp.ReturnsProperty: 2543 value: t.Optional[exp.Expression] 2544 null = None 2545 is_table = self._match(TokenType.TABLE) 2546 2547 if is_table: 2548 if self._match(TokenType.LT): 2549 value = self.expression( 2550 exp.Schema, 2551 this="TABLE", 2552 expressions=self._parse_csv(self._parse_struct_types), 2553 ) 2554 if not self._match(TokenType.GT): 2555 self.raise_error("Expecting >") 2556 else: 2557 value = self._parse_schema(exp.var("TABLE")) 2558 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2559 null = True 2560 value = None 2561 else: 2562 value = self._parse_types() 2563 2564 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2565 2566 def _parse_describe(self) -> exp.Describe: 2567 kind = self._match_set(self.CREATABLES) and self._prev.text 2568 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2569 if self._match(TokenType.DOT): 2570 style = None 2571 self._retreat(self._index - 2) 2572 this = self._parse_table(schema=True) 2573 properties = self._parse_properties() 2574 expressions = properties.expressions if properties else None 2575 partition = self._parse_partition() 2576 return self.expression( 2577 exp.Describe, 2578 this=this, 2579 style=style, 2580 kind=kind, 2581 expressions=expressions, 2582 partition=partition, 2583 ) 2584 2585 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2586 kind = self._prev.text.upper() 2587 expressions = [] 2588 2589 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2590 if self._match(TokenType.WHEN): 2591 expression = self._parse_disjunction() 2592 self._match(TokenType.THEN) 2593 else: 2594 expression = None 2595 2596 else_ = self._match(TokenType.ELSE) 2597 2598 if not self._match(TokenType.INTO): 2599 return None 2600 2601 return self.expression( 2602 exp.ConditionalInsert, 2603 this=self.expression( 2604 exp.Insert, 2605 this=self._parse_table(schema=True), 2606 expression=self._parse_derived_table_values(), 2607 ), 2608 expression=expression, 2609 else_=else_, 2610 ) 2611 2612 expression = parse_conditional_insert() 2613 while expression is not None: 2614 expressions.append(expression) 2615 expression = parse_conditional_insert() 2616 2617 return self.expression( 2618 exp.MultitableInserts, 2619 kind=kind, 2620 comments=comments, 2621 expressions=expressions, 2622 source=self._parse_table(), 2623 ) 2624 2625 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2626 comments = ensure_list(self._prev_comments) 2627 hint = self._parse_hint() 2628 overwrite = self._match(TokenType.OVERWRITE) 2629 ignore = self._match(TokenType.IGNORE) 2630 local = self._match_text_seq("LOCAL") 2631 alternative = None 2632 is_function = None 2633 2634 if self._match_text_seq("DIRECTORY"): 2635 this: t.Optional[exp.Expression] = self.expression( 2636 exp.Directory, 2637 this=self._parse_var_or_string(), 2638 local=local, 2639 row_format=self._parse_row_format(match_row=True), 2640 ) 2641 else: 2642 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2643 comments += ensure_list(self._prev_comments) 2644 return self._parse_multitable_inserts(comments) 2645 2646 if self._match(TokenType.OR): 2647 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2648 2649 self._match(TokenType.INTO) 2650 comments += ensure_list(self._prev_comments) 2651 self._match(TokenType.TABLE) 2652 is_function = self._match(TokenType.FUNCTION) 2653 2654 this = ( 2655 self._parse_table(schema=True, parse_partition=True) 2656 if not is_function 2657 else self._parse_function() 2658 ) 2659 2660 returning = self._parse_returning() 2661 2662 return self.expression( 2663 exp.Insert, 2664 comments=comments, 2665 hint=hint, 2666 is_function=is_function, 2667 this=this, 2668 stored=self._match_text_seq("STORED") and self._parse_stored(), 2669 by_name=self._match_text_seq("BY", "NAME"), 2670 exists=self._parse_exists(), 2671 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2672 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2673 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2674 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2675 conflict=self._parse_on_conflict(), 2676 returning=returning or self._parse_returning(), 2677 overwrite=overwrite, 2678 alternative=alternative, 2679 ignore=ignore, 2680 source=self._match(TokenType.TABLE) and self._parse_table(), 2681 ) 2682 2683 def _parse_kill(self) -> exp.Kill: 2684 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2685 2686 return self.expression( 2687 exp.Kill, 2688 this=self._parse_primary(), 2689 kind=kind, 2690 ) 2691 2692 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2693 conflict = self._match_text_seq("ON", "CONFLICT") 2694 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2695 2696 if not conflict and not duplicate: 2697 return None 2698 2699 conflict_keys = None 2700 constraint = None 2701 2702 if conflict: 2703 if self._match_text_seq("ON", "CONSTRAINT"): 2704 constraint = self._parse_id_var() 2705 elif self._match(TokenType.L_PAREN): 2706 conflict_keys = self._parse_csv(self._parse_id_var) 2707 self._match_r_paren() 2708 2709 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2710 if self._prev.token_type == TokenType.UPDATE: 2711 self._match(TokenType.SET) 2712 expressions = self._parse_csv(self._parse_equality) 2713 else: 2714 expressions = None 2715 2716 return self.expression( 2717 exp.OnConflict, 2718 duplicate=duplicate, 2719 expressions=expressions, 2720 action=action, 2721 conflict_keys=conflict_keys, 2722 constraint=constraint, 2723 ) 2724 2725 def _parse_returning(self) -> t.Optional[exp.Returning]: 2726 if not self._match(TokenType.RETURNING): 2727 return None 2728 return self.expression( 2729 exp.Returning, 2730 expressions=self._parse_csv(self._parse_expression), 2731 into=self._match(TokenType.INTO) and self._parse_table_part(), 2732 ) 2733 2734 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2735 if not self._match(TokenType.FORMAT): 2736 return None 2737 return self._parse_row_format() 2738 2739 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2740 index = self._index 2741 with_ = with_ or self._match_text_seq("WITH") 2742 2743 if not self._match(TokenType.SERDE_PROPERTIES): 2744 self._retreat(index) 2745 return None 2746 return self.expression( 2747 exp.SerdeProperties, 2748 **{ # type: ignore 2749 "expressions": self._parse_wrapped_properties(), 2750 "with": with_, 2751 }, 2752 ) 2753 2754 def _parse_row_format( 2755 self, match_row: bool = False 2756 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2757 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2758 return None 2759 2760 if self._match_text_seq("SERDE"): 2761 this = self._parse_string() 2762 2763 serde_properties = self._parse_serde_properties() 2764 2765 return self.expression( 2766 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2767 ) 2768 2769 self._match_text_seq("DELIMITED") 2770 2771 kwargs = {} 2772 2773 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2774 kwargs["fields"] = self._parse_string() 2775 if self._match_text_seq("ESCAPED", "BY"): 2776 kwargs["escaped"] = self._parse_string() 2777 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2778 kwargs["collection_items"] = self._parse_string() 2779 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2780 kwargs["map_keys"] = self._parse_string() 2781 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2782 kwargs["lines"] = self._parse_string() 2783 if self._match_text_seq("NULL", "DEFINED", "AS"): 2784 kwargs["null"] = self._parse_string() 2785 2786 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2787 2788 def _parse_load(self) -> exp.LoadData | exp.Command: 2789 if self._match_text_seq("DATA"): 2790 local = self._match_text_seq("LOCAL") 2791 self._match_text_seq("INPATH") 2792 inpath = self._parse_string() 2793 overwrite = self._match(TokenType.OVERWRITE) 2794 self._match_pair(TokenType.INTO, TokenType.TABLE) 2795 2796 return self.expression( 2797 exp.LoadData, 2798 this=self._parse_table(schema=True), 2799 local=local, 2800 overwrite=overwrite, 2801 inpath=inpath, 2802 partition=self._parse_partition(), 2803 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2804 serde=self._match_text_seq("SERDE") and self._parse_string(), 2805 ) 2806 return self._parse_as_command(self._prev) 2807 2808 def _parse_delete(self) -> exp.Delete: 2809 # This handles MySQL's "Multiple-Table Syntax" 2810 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2811 tables = None 2812 comments = self._prev_comments 2813 if not self._match(TokenType.FROM, advance=False): 2814 tables = self._parse_csv(self._parse_table) or None 2815 2816 returning = self._parse_returning() 2817 2818 return self.expression( 2819 exp.Delete, 2820 comments=comments, 2821 tables=tables, 2822 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2823 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2824 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2825 where=self._parse_where(), 2826 returning=returning or self._parse_returning(), 2827 limit=self._parse_limit(), 2828 ) 2829 2830 def _parse_update(self) -> exp.Update: 2831 comments = self._prev_comments 2832 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2833 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2834 returning = self._parse_returning() 2835 return self.expression( 2836 exp.Update, 2837 comments=comments, 2838 **{ # type: ignore 2839 "this": this, 2840 "expressions": expressions, 2841 "from": self._parse_from(joins=True), 2842 "where": self._parse_where(), 2843 "returning": returning or self._parse_returning(), 2844 "order": self._parse_order(), 2845 "limit": self._parse_limit(), 2846 }, 2847 ) 2848 2849 def _parse_uncache(self) -> exp.Uncache: 2850 if not self._match(TokenType.TABLE): 2851 self.raise_error("Expecting TABLE after UNCACHE") 2852 2853 return self.expression( 2854 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2855 ) 2856 2857 def _parse_cache(self) -> exp.Cache: 2858 lazy = self._match_text_seq("LAZY") 2859 self._match(TokenType.TABLE) 2860 table = self._parse_table(schema=True) 2861 2862 options = [] 2863 if self._match_text_seq("OPTIONS"): 2864 self._match_l_paren() 2865 k = self._parse_string() 2866 self._match(TokenType.EQ) 2867 v = self._parse_string() 2868 options = [k, v] 2869 self._match_r_paren() 2870 2871 self._match(TokenType.ALIAS) 2872 return self.expression( 2873 exp.Cache, 2874 this=table, 2875 lazy=lazy, 2876 options=options, 2877 expression=self._parse_select(nested=True), 2878 ) 2879 2880 def _parse_partition(self) -> t.Optional[exp.Partition]: 2881 if not self._match(TokenType.PARTITION): 2882 return None 2883 2884 return self.expression( 2885 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2886 ) 2887 2888 def _parse_value(self) -> t.Optional[exp.Tuple]: 2889 if self._match(TokenType.L_PAREN): 2890 expressions = self._parse_csv(self._parse_expression) 2891 self._match_r_paren() 2892 return self.expression(exp.Tuple, expressions=expressions) 2893 2894 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2895 expression = self._parse_expression() 2896 if expression: 2897 return self.expression(exp.Tuple, expressions=[expression]) 2898 return None 2899 2900 def _parse_projections(self) -> t.List[exp.Expression]: 2901 return self._parse_expressions() 2902 2903 def _parse_select( 2904 self, 2905 nested: bool = False, 2906 table: bool = False, 2907 parse_subquery_alias: bool = True, 2908 parse_set_operation: bool = True, 2909 ) -> t.Optional[exp.Expression]: 2910 cte = self._parse_with() 2911 2912 if cte: 2913 this = self._parse_statement() 2914 2915 if not this: 2916 self.raise_error("Failed to parse any statement following CTE") 2917 return cte 2918 2919 if "with" in this.arg_types: 2920 this.set("with", cte) 2921 else: 2922 self.raise_error(f"{this.key} does not support CTE") 2923 this = cte 2924 2925 return this 2926 2927 # duckdb supports leading with FROM x 2928 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2929 2930 if self._match(TokenType.SELECT): 2931 comments = self._prev_comments 2932 2933 hint = self._parse_hint() 2934 2935 if self._next and not self._next.token_type == TokenType.DOT: 2936 all_ = self._match(TokenType.ALL) 2937 distinct = self._match_set(self.DISTINCT_TOKENS) 2938 else: 2939 all_, distinct = None, None 2940 2941 kind = ( 2942 self._match(TokenType.ALIAS) 2943 and self._match_texts(("STRUCT", "VALUE")) 2944 and self._prev.text.upper() 2945 ) 2946 2947 if distinct: 2948 distinct = self.expression( 2949 exp.Distinct, 2950 on=self._parse_value() if self._match(TokenType.ON) else None, 2951 ) 2952 2953 if all_ and distinct: 2954 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2955 2956 limit = self._parse_limit(top=True) 2957 projections = self._parse_projections() 2958 2959 this = self.expression( 2960 exp.Select, 2961 kind=kind, 2962 hint=hint, 2963 distinct=distinct, 2964 expressions=projections, 2965 limit=limit, 2966 ) 2967 this.comments = comments 2968 2969 into = self._parse_into() 2970 if into: 2971 this.set("into", into) 2972 2973 if not from_: 2974 from_ = self._parse_from() 2975 2976 if from_: 2977 this.set("from", from_) 2978 2979 this = self._parse_query_modifiers(this) 2980 elif (table or nested) and self._match(TokenType.L_PAREN): 2981 if self._match(TokenType.PIVOT): 2982 this = self._parse_simplified_pivot() 2983 elif self._match(TokenType.FROM): 2984 this = exp.select("*").from_( 2985 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2986 ) 2987 else: 2988 this = ( 2989 self._parse_table() 2990 if table 2991 else self._parse_select(nested=True, parse_set_operation=False) 2992 ) 2993 2994 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 2995 # in case a modifier (e.g. join) is following 2996 if table and isinstance(this, exp.Values) and this.alias: 2997 alias = this.args["alias"].pop() 2998 this = exp.Table(this=this, alias=alias) 2999 3000 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3001 3002 self._match_r_paren() 3003 3004 # We return early here so that the UNION isn't attached to the subquery by the 3005 # following call to _parse_set_operations, but instead becomes the parent node 3006 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3007 elif self._match(TokenType.VALUES, advance=False): 3008 this = self._parse_derived_table_values() 3009 elif from_: 3010 this = exp.select("*").from_(from_.this, copy=False) 3011 elif self._match(TokenType.SUMMARIZE): 3012 table = self._match(TokenType.TABLE) 3013 this = self._parse_select() or self._parse_string() or self._parse_table() 3014 return self.expression(exp.Summarize, this=this, table=table) 3015 elif self._match(TokenType.DESCRIBE): 3016 this = self._parse_describe() 3017 elif self._match_text_seq("STREAM"): 3018 this = self.expression(exp.Stream, this=self._parse_function()) 3019 else: 3020 this = None 3021 3022 return self._parse_set_operations(this) if parse_set_operation else this 3023 3024 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3025 if not skip_with_token and not self._match(TokenType.WITH): 3026 return None 3027 3028 comments = self._prev_comments 3029 recursive = self._match(TokenType.RECURSIVE) 3030 3031 expressions = [] 3032 while True: 3033 expressions.append(self._parse_cte()) 3034 3035 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3036 break 3037 else: 3038 self._match(TokenType.WITH) 3039 3040 return self.expression( 3041 exp.With, comments=comments, expressions=expressions, recursive=recursive 3042 ) 3043 3044 def _parse_cte(self) -> exp.CTE: 3045 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3046 if not alias or not alias.this: 3047 self.raise_error("Expected CTE to have alias") 3048 3049 self._match(TokenType.ALIAS) 3050 comments = self._prev_comments 3051 3052 if self._match_text_seq("NOT", "MATERIALIZED"): 3053 materialized = False 3054 elif self._match_text_seq("MATERIALIZED"): 3055 materialized = True 3056 else: 3057 materialized = None 3058 3059 return self.expression( 3060 exp.CTE, 3061 this=self._parse_wrapped(self._parse_statement), 3062 alias=alias, 3063 materialized=materialized, 3064 comments=comments, 3065 ) 3066 3067 def _parse_table_alias( 3068 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3069 ) -> t.Optional[exp.TableAlias]: 3070 any_token = self._match(TokenType.ALIAS) 3071 alias = ( 3072 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3073 or self._parse_string_as_identifier() 3074 ) 3075 3076 index = self._index 3077 if self._match(TokenType.L_PAREN): 3078 columns = self._parse_csv(self._parse_function_parameter) 3079 self._match_r_paren() if columns else self._retreat(index) 3080 else: 3081 columns = None 3082 3083 if not alias and not columns: 3084 return None 3085 3086 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3087 3088 # We bubble up comments from the Identifier to the TableAlias 3089 if isinstance(alias, exp.Identifier): 3090 table_alias.add_comments(alias.pop_comments()) 3091 3092 return table_alias 3093 3094 def _parse_subquery( 3095 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3096 ) -> t.Optional[exp.Subquery]: 3097 if not this: 3098 return None 3099 3100 return self.expression( 3101 exp.Subquery, 3102 this=this, 3103 pivots=self._parse_pivots(), 3104 alias=self._parse_table_alias() if parse_alias else None, 3105 sample=self._parse_table_sample(), 3106 ) 3107 3108 def _implicit_unnests_to_explicit(self, this: E) -> E: 3109 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3110 3111 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3112 for i, join in enumerate(this.args.get("joins") or []): 3113 table = join.this 3114 normalized_table = table.copy() 3115 normalized_table.meta["maybe_column"] = True 3116 normalized_table = _norm(normalized_table, dialect=self.dialect) 3117 3118 if isinstance(table, exp.Table) and not join.args.get("on"): 3119 if normalized_table.parts[0].name in refs: 3120 table_as_column = table.to_column() 3121 unnest = exp.Unnest(expressions=[table_as_column]) 3122 3123 # Table.to_column creates a parent Alias node that we want to convert to 3124 # a TableAlias and attach to the Unnest, so it matches the parser's output 3125 if isinstance(table.args.get("alias"), exp.TableAlias): 3126 table_as_column.replace(table_as_column.this) 3127 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3128 3129 table.replace(unnest) 3130 3131 refs.add(normalized_table.alias_or_name) 3132 3133 return this 3134 3135 def _parse_query_modifiers( 3136 self, this: t.Optional[exp.Expression] 3137 ) -> t.Optional[exp.Expression]: 3138 if isinstance(this, (exp.Query, exp.Table)): 3139 for join in self._parse_joins(): 3140 this.append("joins", join) 3141 for lateral in iter(self._parse_lateral, None): 3142 this.append("laterals", lateral) 3143 3144 while True: 3145 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3146 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3147 key, expression = parser(self) 3148 3149 if expression: 3150 this.set(key, expression) 3151 if key == "limit": 3152 offset = expression.args.pop("offset", None) 3153 3154 if offset: 3155 offset = exp.Offset(expression=offset) 3156 this.set("offset", offset) 3157 3158 limit_by_expressions = expression.expressions 3159 expression.set("expressions", None) 3160 offset.set("expressions", limit_by_expressions) 3161 continue 3162 break 3163 3164 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3165 this = self._implicit_unnests_to_explicit(this) 3166 3167 return this 3168 3169 def _parse_hint(self) -> t.Optional[exp.Hint]: 3170 if self._match(TokenType.HINT): 3171 hints = [] 3172 for hint in iter( 3173 lambda: self._parse_csv( 3174 lambda: self._parse_function() or self._parse_var(upper=True) 3175 ), 3176 [], 3177 ): 3178 hints.extend(hint) 3179 3180 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3181 self.raise_error("Expected */ after HINT") 3182 3183 return self.expression(exp.Hint, expressions=hints) 3184 3185 return None 3186 3187 def _parse_into(self) -> t.Optional[exp.Into]: 3188 if not self._match(TokenType.INTO): 3189 return None 3190 3191 temp = self._match(TokenType.TEMPORARY) 3192 unlogged = self._match_text_seq("UNLOGGED") 3193 self._match(TokenType.TABLE) 3194 3195 return self.expression( 3196 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3197 ) 3198 3199 def _parse_from( 3200 self, joins: bool = False, skip_from_token: bool = False 3201 ) -> t.Optional[exp.From]: 3202 if not skip_from_token and not self._match(TokenType.FROM): 3203 return None 3204 3205 return self.expression( 3206 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3207 ) 3208 3209 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3210 return self.expression( 3211 exp.MatchRecognizeMeasure, 3212 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3213 this=self._parse_expression(), 3214 ) 3215 3216 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3217 if not self._match(TokenType.MATCH_RECOGNIZE): 3218 return None 3219 3220 self._match_l_paren() 3221 3222 partition = self._parse_partition_by() 3223 order = self._parse_order() 3224 3225 measures = ( 3226 self._parse_csv(self._parse_match_recognize_measure) 3227 if self._match_text_seq("MEASURES") 3228 else None 3229 ) 3230 3231 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3232 rows = exp.var("ONE ROW PER MATCH") 3233 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3234 text = "ALL ROWS PER MATCH" 3235 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3236 text += " SHOW EMPTY MATCHES" 3237 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3238 text += " OMIT EMPTY MATCHES" 3239 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3240 text += " WITH UNMATCHED ROWS" 3241 rows = exp.var(text) 3242 else: 3243 rows = None 3244 3245 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3246 text = "AFTER MATCH SKIP" 3247 if self._match_text_seq("PAST", "LAST", "ROW"): 3248 text += " PAST LAST ROW" 3249 elif self._match_text_seq("TO", "NEXT", "ROW"): 3250 text += " TO NEXT ROW" 3251 elif self._match_text_seq("TO", "FIRST"): 3252 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3253 elif self._match_text_seq("TO", "LAST"): 3254 text += f" TO LAST {self._advance_any().text}" # type: ignore 3255 after = exp.var(text) 3256 else: 3257 after = None 3258 3259 if self._match_text_seq("PATTERN"): 3260 self._match_l_paren() 3261 3262 if not self._curr: 3263 self.raise_error("Expecting )", self._curr) 3264 3265 paren = 1 3266 start = self._curr 3267 3268 while self._curr and paren > 0: 3269 if self._curr.token_type == TokenType.L_PAREN: 3270 paren += 1 3271 if self._curr.token_type == TokenType.R_PAREN: 3272 paren -= 1 3273 3274 end = self._prev 3275 self._advance() 3276 3277 if paren > 0: 3278 self.raise_error("Expecting )", self._curr) 3279 3280 pattern = exp.var(self._find_sql(start, end)) 3281 else: 3282 pattern = None 3283 3284 define = ( 3285 self._parse_csv(self._parse_name_as_expression) 3286 if self._match_text_seq("DEFINE") 3287 else None 3288 ) 3289 3290 self._match_r_paren() 3291 3292 return self.expression( 3293 exp.MatchRecognize, 3294 partition_by=partition, 3295 order=order, 3296 measures=measures, 3297 rows=rows, 3298 after=after, 3299 pattern=pattern, 3300 define=define, 3301 alias=self._parse_table_alias(), 3302 ) 3303 3304 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3305 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3306 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3307 cross_apply = False 3308 3309 if cross_apply is not None: 3310 this = self._parse_select(table=True) 3311 view = None 3312 outer = None 3313 elif self._match(TokenType.LATERAL): 3314 this = self._parse_select(table=True) 3315 view = self._match(TokenType.VIEW) 3316 outer = self._match(TokenType.OUTER) 3317 else: 3318 return None 3319 3320 if not this: 3321 this = ( 3322 self._parse_unnest() 3323 or self._parse_function() 3324 or self._parse_id_var(any_token=False) 3325 ) 3326 3327 while self._match(TokenType.DOT): 3328 this = exp.Dot( 3329 this=this, 3330 expression=self._parse_function() or self._parse_id_var(any_token=False), 3331 ) 3332 3333 if view: 3334 table = self._parse_id_var(any_token=False) 3335 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3336 table_alias: t.Optional[exp.TableAlias] = self.expression( 3337 exp.TableAlias, this=table, columns=columns 3338 ) 3339 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3340 # We move the alias from the lateral's child node to the lateral itself 3341 table_alias = this.args["alias"].pop() 3342 else: 3343 table_alias = self._parse_table_alias() 3344 3345 return self.expression( 3346 exp.Lateral, 3347 this=this, 3348 view=view, 3349 outer=outer, 3350 alias=table_alias, 3351 cross_apply=cross_apply, 3352 ) 3353 3354 def _parse_join_parts( 3355 self, 3356 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3357 return ( 3358 self._match_set(self.JOIN_METHODS) and self._prev, 3359 self._match_set(self.JOIN_SIDES) and self._prev, 3360 self._match_set(self.JOIN_KINDS) and self._prev, 3361 ) 3362 3363 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3364 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3365 this = self._parse_column() 3366 if isinstance(this, exp.Column): 3367 return this.this 3368 return this 3369 3370 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3371 3372 def _parse_join( 3373 self, skip_join_token: bool = False, parse_bracket: bool = False 3374 ) -> t.Optional[exp.Join]: 3375 if self._match(TokenType.COMMA): 3376 return self.expression(exp.Join, this=self._parse_table()) 3377 3378 index = self._index 3379 method, side, kind = self._parse_join_parts() 3380 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3381 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3382 3383 if not skip_join_token and not join: 3384 self._retreat(index) 3385 kind = None 3386 method = None 3387 side = None 3388 3389 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3390 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3391 3392 if not skip_join_token and not join and not outer_apply and not cross_apply: 3393 return None 3394 3395 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3396 3397 if method: 3398 kwargs["method"] = method.text 3399 if side: 3400 kwargs["side"] = side.text 3401 if kind: 3402 kwargs["kind"] = kind.text 3403 if hint: 3404 kwargs["hint"] = hint 3405 3406 if self._match(TokenType.MATCH_CONDITION): 3407 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3408 3409 if self._match(TokenType.ON): 3410 kwargs["on"] = self._parse_assignment() 3411 elif self._match(TokenType.USING): 3412 kwargs["using"] = self._parse_using_identifiers() 3413 elif ( 3414 not (outer_apply or cross_apply) 3415 and not isinstance(kwargs["this"], exp.Unnest) 3416 and not (kind and kind.token_type == TokenType.CROSS) 3417 ): 3418 index = self._index 3419 joins: t.Optional[list] = list(self._parse_joins()) 3420 3421 if joins and self._match(TokenType.ON): 3422 kwargs["on"] = self._parse_assignment() 3423 elif joins and self._match(TokenType.USING): 3424 kwargs["using"] = self._parse_using_identifiers() 3425 else: 3426 joins = None 3427 self._retreat(index) 3428 3429 kwargs["this"].set("joins", joins if joins else None) 3430 3431 comments = [c for token in (method, side, kind) if token for c in token.comments] 3432 return self.expression(exp.Join, comments=comments, **kwargs) 3433 3434 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3435 this = self._parse_assignment() 3436 3437 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3438 return this 3439 3440 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3441 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3442 3443 return this 3444 3445 def _parse_index_params(self) -> exp.IndexParameters: 3446 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3447 3448 if self._match(TokenType.L_PAREN, advance=False): 3449 columns = self._parse_wrapped_csv(self._parse_with_operator) 3450 else: 3451 columns = None 3452 3453 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3454 partition_by = self._parse_partition_by() 3455 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3456 tablespace = ( 3457 self._parse_var(any_token=True) 3458 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3459 else None 3460 ) 3461 where = self._parse_where() 3462 3463 on = self._parse_field() if self._match(TokenType.ON) else None 3464 3465 return self.expression( 3466 exp.IndexParameters, 3467 using=using, 3468 columns=columns, 3469 include=include, 3470 partition_by=partition_by, 3471 where=where, 3472 with_storage=with_storage, 3473 tablespace=tablespace, 3474 on=on, 3475 ) 3476 3477 def _parse_index( 3478 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3479 ) -> t.Optional[exp.Index]: 3480 if index or anonymous: 3481 unique = None 3482 primary = None 3483 amp = None 3484 3485 self._match(TokenType.ON) 3486 self._match(TokenType.TABLE) # hive 3487 table = self._parse_table_parts(schema=True) 3488 else: 3489 unique = self._match(TokenType.UNIQUE) 3490 primary = self._match_text_seq("PRIMARY") 3491 amp = self._match_text_seq("AMP") 3492 3493 if not self._match(TokenType.INDEX): 3494 return None 3495 3496 index = self._parse_id_var() 3497 table = None 3498 3499 params = self._parse_index_params() 3500 3501 return self.expression( 3502 exp.Index, 3503 this=index, 3504 table=table, 3505 unique=unique, 3506 primary=primary, 3507 amp=amp, 3508 params=params, 3509 ) 3510 3511 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3512 hints: t.List[exp.Expression] = [] 3513 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3514 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3515 hints.append( 3516 self.expression( 3517 exp.WithTableHint, 3518 expressions=self._parse_csv( 3519 lambda: self._parse_function() or self._parse_var(any_token=True) 3520 ), 3521 ) 3522 ) 3523 self._match_r_paren() 3524 else: 3525 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3526 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3527 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3528 3529 self._match_set((TokenType.INDEX, TokenType.KEY)) 3530 if self._match(TokenType.FOR): 3531 hint.set("target", self._advance_any() and self._prev.text.upper()) 3532 3533 hint.set("expressions", self._parse_wrapped_id_vars()) 3534 hints.append(hint) 3535 3536 return hints or None 3537 3538 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3539 return ( 3540 (not schema and self._parse_function(optional_parens=False)) 3541 or self._parse_id_var(any_token=False) 3542 or self._parse_string_as_identifier() 3543 or self._parse_placeholder() 3544 ) 3545 3546 def _parse_table_parts( 3547 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3548 ) -> exp.Table: 3549 catalog = None 3550 db = None 3551 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3552 3553 while self._match(TokenType.DOT): 3554 if catalog: 3555 # This allows nesting the table in arbitrarily many dot expressions if needed 3556 table = self.expression( 3557 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3558 ) 3559 else: 3560 catalog = db 3561 db = table 3562 # "" used for tsql FROM a..b case 3563 table = self._parse_table_part(schema=schema) or "" 3564 3565 if ( 3566 wildcard 3567 and self._is_connected() 3568 and (isinstance(table, exp.Identifier) or not table) 3569 and self._match(TokenType.STAR) 3570 ): 3571 if isinstance(table, exp.Identifier): 3572 table.args["this"] += "*" 3573 else: 3574 table = exp.Identifier(this="*") 3575 3576 # We bubble up comments from the Identifier to the Table 3577 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3578 3579 if is_db_reference: 3580 catalog = db 3581 db = table 3582 table = None 3583 3584 if not table and not is_db_reference: 3585 self.raise_error(f"Expected table name but got {self._curr}") 3586 if not db and is_db_reference: 3587 self.raise_error(f"Expected database name but got {self._curr}") 3588 3589 table = self.expression( 3590 exp.Table, 3591 comments=comments, 3592 this=table, 3593 db=db, 3594 catalog=catalog, 3595 ) 3596 3597 changes = self._parse_changes() 3598 if changes: 3599 table.set("changes", changes) 3600 3601 at_before = self._parse_historical_data() 3602 if at_before: 3603 table.set("when", at_before) 3604 3605 pivots = self._parse_pivots() 3606 if pivots: 3607 table.set("pivots", pivots) 3608 3609 return table 3610 3611 def _parse_table( 3612 self, 3613 schema: bool = False, 3614 joins: bool = False, 3615 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3616 parse_bracket: bool = False, 3617 is_db_reference: bool = False, 3618 parse_partition: bool = False, 3619 ) -> t.Optional[exp.Expression]: 3620 lateral = self._parse_lateral() 3621 if lateral: 3622 return lateral 3623 3624 unnest = self._parse_unnest() 3625 if unnest: 3626 return unnest 3627 3628 values = self._parse_derived_table_values() 3629 if values: 3630 return values 3631 3632 subquery = self._parse_select(table=True) 3633 if subquery: 3634 if not subquery.args.get("pivots"): 3635 subquery.set("pivots", self._parse_pivots()) 3636 return subquery 3637 3638 bracket = parse_bracket and self._parse_bracket(None) 3639 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3640 3641 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3642 self._parse_table 3643 ) 3644 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3645 3646 only = self._match(TokenType.ONLY) 3647 3648 this = t.cast( 3649 exp.Expression, 3650 bracket 3651 or rows_from 3652 or self._parse_bracket( 3653 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3654 ), 3655 ) 3656 3657 if only: 3658 this.set("only", only) 3659 3660 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3661 self._match_text_seq("*") 3662 3663 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3664 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3665 this.set("partition", self._parse_partition()) 3666 3667 if schema: 3668 return self._parse_schema(this=this) 3669 3670 version = self._parse_version() 3671 3672 if version: 3673 this.set("version", version) 3674 3675 if self.dialect.ALIAS_POST_TABLESAMPLE: 3676 this.set("sample", self._parse_table_sample()) 3677 3678 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3679 if alias: 3680 this.set("alias", alias) 3681 3682 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3683 return self.expression( 3684 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3685 ) 3686 3687 this.set("hints", self._parse_table_hints()) 3688 3689 if not this.args.get("pivots"): 3690 this.set("pivots", self._parse_pivots()) 3691 3692 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3693 this.set("sample", self._parse_table_sample()) 3694 3695 if joins: 3696 for join in self._parse_joins(): 3697 this.append("joins", join) 3698 3699 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3700 this.set("ordinality", True) 3701 this.set("alias", self._parse_table_alias()) 3702 3703 return this 3704 3705 def _parse_version(self) -> t.Optional[exp.Version]: 3706 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3707 this = "TIMESTAMP" 3708 elif self._match(TokenType.VERSION_SNAPSHOT): 3709 this = "VERSION" 3710 else: 3711 return None 3712 3713 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3714 kind = self._prev.text.upper() 3715 start = self._parse_bitwise() 3716 self._match_texts(("TO", "AND")) 3717 end = self._parse_bitwise() 3718 expression: t.Optional[exp.Expression] = self.expression( 3719 exp.Tuple, expressions=[start, end] 3720 ) 3721 elif self._match_text_seq("CONTAINED", "IN"): 3722 kind = "CONTAINED IN" 3723 expression = self.expression( 3724 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3725 ) 3726 elif self._match(TokenType.ALL): 3727 kind = "ALL" 3728 expression = None 3729 else: 3730 self._match_text_seq("AS", "OF") 3731 kind = "AS OF" 3732 expression = self._parse_type() 3733 3734 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3735 3736 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3737 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3738 index = self._index 3739 historical_data = None 3740 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3741 this = self._prev.text.upper() 3742 kind = ( 3743 self._match(TokenType.L_PAREN) 3744 and self._match_texts(self.HISTORICAL_DATA_KIND) 3745 and self._prev.text.upper() 3746 ) 3747 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3748 3749 if expression: 3750 self._match_r_paren() 3751 historical_data = self.expression( 3752 exp.HistoricalData, this=this, kind=kind, expression=expression 3753 ) 3754 else: 3755 self._retreat(index) 3756 3757 return historical_data 3758 3759 def _parse_changes(self) -> t.Optional[exp.Changes]: 3760 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3761 return None 3762 3763 information = self._parse_var(any_token=True) 3764 self._match_r_paren() 3765 3766 return self.expression( 3767 exp.Changes, 3768 information=information, 3769 at_before=self._parse_historical_data(), 3770 end=self._parse_historical_data(), 3771 ) 3772 3773 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3774 if not self._match(TokenType.UNNEST): 3775 return None 3776 3777 expressions = self._parse_wrapped_csv(self._parse_equality) 3778 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3779 3780 alias = self._parse_table_alias() if with_alias else None 3781 3782 if alias: 3783 if self.dialect.UNNEST_COLUMN_ONLY: 3784 if alias.args.get("columns"): 3785 self.raise_error("Unexpected extra column alias in unnest.") 3786 3787 alias.set("columns", [alias.this]) 3788 alias.set("this", None) 3789 3790 columns = alias.args.get("columns") or [] 3791 if offset and len(expressions) < len(columns): 3792 offset = columns.pop() 3793 3794 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3795 self._match(TokenType.ALIAS) 3796 offset = self._parse_id_var( 3797 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3798 ) or exp.to_identifier("offset") 3799 3800 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3801 3802 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3803 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3804 if not is_derived and not ( 3805 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3806 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3807 ): 3808 return None 3809 3810 expressions = self._parse_csv(self._parse_value) 3811 alias = self._parse_table_alias() 3812 3813 if is_derived: 3814 self._match_r_paren() 3815 3816 return self.expression( 3817 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3818 ) 3819 3820 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3821 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3822 as_modifier and self._match_text_seq("USING", "SAMPLE") 3823 ): 3824 return None 3825 3826 bucket_numerator = None 3827 bucket_denominator = None 3828 bucket_field = None 3829 percent = None 3830 size = None 3831 seed = None 3832 3833 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3834 matched_l_paren = self._match(TokenType.L_PAREN) 3835 3836 if self.TABLESAMPLE_CSV: 3837 num = None 3838 expressions = self._parse_csv(self._parse_primary) 3839 else: 3840 expressions = None 3841 num = ( 3842 self._parse_factor() 3843 if self._match(TokenType.NUMBER, advance=False) 3844 else self._parse_primary() or self._parse_placeholder() 3845 ) 3846 3847 if self._match_text_seq("BUCKET"): 3848 bucket_numerator = self._parse_number() 3849 self._match_text_seq("OUT", "OF") 3850 bucket_denominator = bucket_denominator = self._parse_number() 3851 self._match(TokenType.ON) 3852 bucket_field = self._parse_field() 3853 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3854 percent = num 3855 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3856 size = num 3857 else: 3858 percent = num 3859 3860 if matched_l_paren: 3861 self._match_r_paren() 3862 3863 if self._match(TokenType.L_PAREN): 3864 method = self._parse_var(upper=True) 3865 seed = self._match(TokenType.COMMA) and self._parse_number() 3866 self._match_r_paren() 3867 elif self._match_texts(("SEED", "REPEATABLE")): 3868 seed = self._parse_wrapped(self._parse_number) 3869 3870 if not method and self.DEFAULT_SAMPLING_METHOD: 3871 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3872 3873 return self.expression( 3874 exp.TableSample, 3875 expressions=expressions, 3876 method=method, 3877 bucket_numerator=bucket_numerator, 3878 bucket_denominator=bucket_denominator, 3879 bucket_field=bucket_field, 3880 percent=percent, 3881 size=size, 3882 seed=seed, 3883 ) 3884 3885 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3886 return list(iter(self._parse_pivot, None)) or None 3887 3888 def _parse_joins(self) -> t.Iterator[exp.Join]: 3889 return iter(self._parse_join, None) 3890 3891 # https://duckdb.org/docs/sql/statements/pivot 3892 def _parse_simplified_pivot(self) -> exp.Pivot: 3893 def _parse_on() -> t.Optional[exp.Expression]: 3894 this = self._parse_bitwise() 3895 return self._parse_in(this) if self._match(TokenType.IN) else this 3896 3897 this = self._parse_table() 3898 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3899 using = self._match(TokenType.USING) and self._parse_csv( 3900 lambda: self._parse_alias(self._parse_function()) 3901 ) 3902 group = self._parse_group() 3903 return self.expression( 3904 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3905 ) 3906 3907 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3908 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3909 this = self._parse_select_or_expression() 3910 3911 self._match(TokenType.ALIAS) 3912 alias = self._parse_bitwise() 3913 if alias: 3914 if isinstance(alias, exp.Column) and not alias.db: 3915 alias = alias.this 3916 return self.expression(exp.PivotAlias, this=this, alias=alias) 3917 3918 return this 3919 3920 value = self._parse_column() 3921 3922 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3923 self.raise_error("Expecting IN (") 3924 3925 if self._match(TokenType.ANY): 3926 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3927 else: 3928 exprs = self._parse_csv(_parse_aliased_expression) 3929 3930 self._match_r_paren() 3931 return self.expression(exp.In, this=value, expressions=exprs) 3932 3933 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3934 index = self._index 3935 include_nulls = None 3936 3937 if self._match(TokenType.PIVOT): 3938 unpivot = False 3939 elif self._match(TokenType.UNPIVOT): 3940 unpivot = True 3941 3942 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3943 if self._match_text_seq("INCLUDE", "NULLS"): 3944 include_nulls = True 3945 elif self._match_text_seq("EXCLUDE", "NULLS"): 3946 include_nulls = False 3947 else: 3948 return None 3949 3950 expressions = [] 3951 3952 if not self._match(TokenType.L_PAREN): 3953 self._retreat(index) 3954 return None 3955 3956 if unpivot: 3957 expressions = self._parse_csv(self._parse_column) 3958 else: 3959 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3960 3961 if not expressions: 3962 self.raise_error("Failed to parse PIVOT's aggregation list") 3963 3964 if not self._match(TokenType.FOR): 3965 self.raise_error("Expecting FOR") 3966 3967 field = self._parse_pivot_in() 3968 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3969 self._parse_bitwise 3970 ) 3971 3972 self._match_r_paren() 3973 3974 pivot = self.expression( 3975 exp.Pivot, 3976 expressions=expressions, 3977 field=field, 3978 unpivot=unpivot, 3979 include_nulls=include_nulls, 3980 default_on_null=default_on_null, 3981 ) 3982 3983 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3984 pivot.set("alias", self._parse_table_alias()) 3985 3986 if not unpivot: 3987 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3988 3989 columns: t.List[exp.Expression] = [] 3990 for fld in pivot.args["field"].expressions: 3991 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3992 for name in names: 3993 if self.PREFIXED_PIVOT_COLUMNS: 3994 name = f"{name}_{field_name}" if name else field_name 3995 else: 3996 name = f"{field_name}_{name}" if name else field_name 3997 3998 columns.append(exp.to_identifier(name)) 3999 4000 pivot.set("columns", columns) 4001 4002 return pivot 4003 4004 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4005 return [agg.alias for agg in aggregations] 4006 4007 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4008 if not skip_where_token and not self._match(TokenType.PREWHERE): 4009 return None 4010 4011 return self.expression( 4012 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4013 ) 4014 4015 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4016 if not skip_where_token and not self._match(TokenType.WHERE): 4017 return None 4018 4019 return self.expression( 4020 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4021 ) 4022 4023 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4024 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4025 return None 4026 4027 elements: t.Dict[str, t.Any] = defaultdict(list) 4028 4029 if self._match(TokenType.ALL): 4030 elements["all"] = True 4031 elif self._match(TokenType.DISTINCT): 4032 elements["all"] = False 4033 4034 while True: 4035 index = self._index 4036 4037 elements["expressions"].extend( 4038 self._parse_csv( 4039 lambda: None 4040 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4041 else self._parse_assignment() 4042 ) 4043 ) 4044 4045 before_with_index = self._index 4046 with_prefix = self._match(TokenType.WITH) 4047 4048 if self._match(TokenType.ROLLUP): 4049 elements["rollup"].append( 4050 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4051 ) 4052 elif self._match(TokenType.CUBE): 4053 elements["cube"].append( 4054 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4055 ) 4056 elif self._match(TokenType.GROUPING_SETS): 4057 elements["grouping_sets"].append( 4058 self.expression( 4059 exp.GroupingSets, 4060 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4061 ) 4062 ) 4063 elif self._match_text_seq("TOTALS"): 4064 elements["totals"] = True # type: ignore 4065 4066 if before_with_index <= self._index <= before_with_index + 1: 4067 self._retreat(before_with_index) 4068 break 4069 4070 if index == self._index: 4071 break 4072 4073 return self.expression(exp.Group, **elements) # type: ignore 4074 4075 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4076 return self.expression( 4077 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4078 ) 4079 4080 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4081 if self._match(TokenType.L_PAREN): 4082 grouping_set = self._parse_csv(self._parse_column) 4083 self._match_r_paren() 4084 return self.expression(exp.Tuple, expressions=grouping_set) 4085 4086 return self._parse_column() 4087 4088 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4089 if not skip_having_token and not self._match(TokenType.HAVING): 4090 return None 4091 return self.expression(exp.Having, this=self._parse_assignment()) 4092 4093 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4094 if not self._match(TokenType.QUALIFY): 4095 return None 4096 return self.expression(exp.Qualify, this=self._parse_assignment()) 4097 4098 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4099 if skip_start_token: 4100 start = None 4101 elif self._match(TokenType.START_WITH): 4102 start = self._parse_assignment() 4103 else: 4104 return None 4105 4106 self._match(TokenType.CONNECT_BY) 4107 nocycle = self._match_text_seq("NOCYCLE") 4108 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4109 exp.Prior, this=self._parse_bitwise() 4110 ) 4111 connect = self._parse_assignment() 4112 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4113 4114 if not start and self._match(TokenType.START_WITH): 4115 start = self._parse_assignment() 4116 4117 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4118 4119 def _parse_name_as_expression(self) -> exp.Alias: 4120 return self.expression( 4121 exp.Alias, 4122 alias=self._parse_id_var(any_token=True), 4123 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4124 ) 4125 4126 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4127 if self._match_text_seq("INTERPOLATE"): 4128 return self._parse_wrapped_csv(self._parse_name_as_expression) 4129 return None 4130 4131 def _parse_order( 4132 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4133 ) -> t.Optional[exp.Expression]: 4134 siblings = None 4135 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4136 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4137 return this 4138 4139 siblings = True 4140 4141 return self.expression( 4142 exp.Order, 4143 this=this, 4144 expressions=self._parse_csv(self._parse_ordered), 4145 siblings=siblings, 4146 ) 4147 4148 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4149 if not self._match(token): 4150 return None 4151 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4152 4153 def _parse_ordered( 4154 self, parse_method: t.Optional[t.Callable] = None 4155 ) -> t.Optional[exp.Ordered]: 4156 this = parse_method() if parse_method else self._parse_assignment() 4157 if not this: 4158 return None 4159 4160 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4161 this = exp.var("ALL") 4162 4163 asc = self._match(TokenType.ASC) 4164 desc = self._match(TokenType.DESC) or (asc and False) 4165 4166 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4167 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4168 4169 nulls_first = is_nulls_first or False 4170 explicitly_null_ordered = is_nulls_first or is_nulls_last 4171 4172 if ( 4173 not explicitly_null_ordered 4174 and ( 4175 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4176 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4177 ) 4178 and self.dialect.NULL_ORDERING != "nulls_are_last" 4179 ): 4180 nulls_first = True 4181 4182 if self._match_text_seq("WITH", "FILL"): 4183 with_fill = self.expression( 4184 exp.WithFill, 4185 **{ # type: ignore 4186 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4187 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4188 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4189 "interpolate": self._parse_interpolate(), 4190 }, 4191 ) 4192 else: 4193 with_fill = None 4194 4195 return self.expression( 4196 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4197 ) 4198 4199 def _parse_limit( 4200 self, 4201 this: t.Optional[exp.Expression] = None, 4202 top: bool = False, 4203 skip_limit_token: bool = False, 4204 ) -> t.Optional[exp.Expression]: 4205 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4206 comments = self._prev_comments 4207 if top: 4208 limit_paren = self._match(TokenType.L_PAREN) 4209 expression = self._parse_term() if limit_paren else self._parse_number() 4210 4211 if limit_paren: 4212 self._match_r_paren() 4213 else: 4214 expression = self._parse_term() 4215 4216 if self._match(TokenType.COMMA): 4217 offset = expression 4218 expression = self._parse_term() 4219 else: 4220 offset = None 4221 4222 limit_exp = self.expression( 4223 exp.Limit, 4224 this=this, 4225 expression=expression, 4226 offset=offset, 4227 comments=comments, 4228 expressions=self._parse_limit_by(), 4229 ) 4230 4231 return limit_exp 4232 4233 if self._match(TokenType.FETCH): 4234 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4235 direction = self._prev.text.upper() if direction else "FIRST" 4236 4237 count = self._parse_field(tokens=self.FETCH_TOKENS) 4238 percent = self._match(TokenType.PERCENT) 4239 4240 self._match_set((TokenType.ROW, TokenType.ROWS)) 4241 4242 only = self._match_text_seq("ONLY") 4243 with_ties = self._match_text_seq("WITH", "TIES") 4244 4245 if only and with_ties: 4246 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4247 4248 return self.expression( 4249 exp.Fetch, 4250 direction=direction, 4251 count=count, 4252 percent=percent, 4253 with_ties=with_ties, 4254 ) 4255 4256 return this 4257 4258 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4259 if not self._match(TokenType.OFFSET): 4260 return this 4261 4262 count = self._parse_term() 4263 self._match_set((TokenType.ROW, TokenType.ROWS)) 4264 4265 return self.expression( 4266 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4267 ) 4268 4269 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4270 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4271 4272 def _parse_locks(self) -> t.List[exp.Lock]: 4273 locks = [] 4274 while True: 4275 if self._match_text_seq("FOR", "UPDATE"): 4276 update = True 4277 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4278 "LOCK", "IN", "SHARE", "MODE" 4279 ): 4280 update = False 4281 else: 4282 break 4283 4284 expressions = None 4285 if self._match_text_seq("OF"): 4286 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4287 4288 wait: t.Optional[bool | exp.Expression] = None 4289 if self._match_text_seq("NOWAIT"): 4290 wait = True 4291 elif self._match_text_seq("WAIT"): 4292 wait = self._parse_primary() 4293 elif self._match_text_seq("SKIP", "LOCKED"): 4294 wait = False 4295 4296 locks.append( 4297 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4298 ) 4299 4300 return locks 4301 4302 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4303 while this and self._match_set(self.SET_OPERATIONS): 4304 token_type = self._prev.token_type 4305 4306 if token_type == TokenType.UNION: 4307 operation: t.Type[exp.SetOperation] = exp.Union 4308 elif token_type == TokenType.EXCEPT: 4309 operation = exp.Except 4310 else: 4311 operation = exp.Intersect 4312 4313 comments = self._prev.comments 4314 4315 if self._match(TokenType.DISTINCT): 4316 distinct: t.Optional[bool] = True 4317 elif self._match(TokenType.ALL): 4318 distinct = False 4319 else: 4320 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4321 if distinct is None: 4322 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4323 4324 by_name = self._match_text_seq("BY", "NAME") 4325 expression = self._parse_select(nested=True, parse_set_operation=False) 4326 4327 this = self.expression( 4328 operation, 4329 comments=comments, 4330 this=this, 4331 distinct=distinct, 4332 by_name=by_name, 4333 expression=expression, 4334 ) 4335 4336 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4337 expression = this.expression 4338 4339 if expression: 4340 for arg in self.SET_OP_MODIFIERS: 4341 expr = expression.args.get(arg) 4342 if expr: 4343 this.set(arg, expr.pop()) 4344 4345 return this 4346 4347 def _parse_expression(self) -> t.Optional[exp.Expression]: 4348 return self._parse_alias(self._parse_assignment()) 4349 4350 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4351 this = self._parse_disjunction() 4352 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4353 # This allows us to parse <non-identifier token> := <expr> 4354 this = exp.column( 4355 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4356 ) 4357 4358 while self._match_set(self.ASSIGNMENT): 4359 if isinstance(this, exp.Column) and len(this.parts) == 1: 4360 this = this.this 4361 4362 this = self.expression( 4363 self.ASSIGNMENT[self._prev.token_type], 4364 this=this, 4365 comments=self._prev_comments, 4366 expression=self._parse_assignment(), 4367 ) 4368 4369 return this 4370 4371 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4372 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4373 4374 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4375 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4376 4377 def _parse_equality(self) -> t.Optional[exp.Expression]: 4378 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4379 4380 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4381 return self._parse_tokens(self._parse_range, self.COMPARISON) 4382 4383 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4384 this = this or self._parse_bitwise() 4385 negate = self._match(TokenType.NOT) 4386 4387 if self._match_set(self.RANGE_PARSERS): 4388 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4389 if not expression: 4390 return this 4391 4392 this = expression 4393 elif self._match(TokenType.ISNULL): 4394 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4395 4396 # Postgres supports ISNULL and NOTNULL for conditions. 4397 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4398 if self._match(TokenType.NOTNULL): 4399 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4400 this = self.expression(exp.Not, this=this) 4401 4402 if negate: 4403 this = self._negate_range(this) 4404 4405 if self._match(TokenType.IS): 4406 this = self._parse_is(this) 4407 4408 return this 4409 4410 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4411 if not this: 4412 return this 4413 4414 return self.expression(exp.Not, this=this) 4415 4416 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4417 index = self._index - 1 4418 negate = self._match(TokenType.NOT) 4419 4420 if self._match_text_seq("DISTINCT", "FROM"): 4421 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4422 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4423 4424 if self._match(TokenType.JSON): 4425 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4426 4427 if self._match_text_seq("WITH"): 4428 _with = True 4429 elif self._match_text_seq("WITHOUT"): 4430 _with = False 4431 else: 4432 _with = None 4433 4434 unique = self._match(TokenType.UNIQUE) 4435 self._match_text_seq("KEYS") 4436 expression: t.Optional[exp.Expression] = self.expression( 4437 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4438 ) 4439 else: 4440 expression = self._parse_primary() or self._parse_null() 4441 if not expression: 4442 self._retreat(index) 4443 return None 4444 4445 this = self.expression(exp.Is, this=this, expression=expression) 4446 return self.expression(exp.Not, this=this) if negate else this 4447 4448 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4449 unnest = self._parse_unnest(with_alias=False) 4450 if unnest: 4451 this = self.expression(exp.In, this=this, unnest=unnest) 4452 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4453 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4454 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4455 4456 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4457 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4458 else: 4459 this = self.expression(exp.In, this=this, expressions=expressions) 4460 4461 if matched_l_paren: 4462 self._match_r_paren(this) 4463 elif not self._match(TokenType.R_BRACKET, expression=this): 4464 self.raise_error("Expecting ]") 4465 else: 4466 this = self.expression(exp.In, this=this, field=self._parse_field()) 4467 4468 return this 4469 4470 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4471 low = self._parse_bitwise() 4472 self._match(TokenType.AND) 4473 high = self._parse_bitwise() 4474 return self.expression(exp.Between, this=this, low=low, high=high) 4475 4476 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4477 if not self._match(TokenType.ESCAPE): 4478 return this 4479 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4480 4481 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4482 index = self._index 4483 4484 if not self._match(TokenType.INTERVAL) and match_interval: 4485 return None 4486 4487 if self._match(TokenType.STRING, advance=False): 4488 this = self._parse_primary() 4489 else: 4490 this = self._parse_term() 4491 4492 if not this or ( 4493 isinstance(this, exp.Column) 4494 and not this.table 4495 and not this.this.quoted 4496 and this.name.upper() == "IS" 4497 ): 4498 self._retreat(index) 4499 return None 4500 4501 unit = self._parse_function() or ( 4502 not self._match(TokenType.ALIAS, advance=False) 4503 and self._parse_var(any_token=True, upper=True) 4504 ) 4505 4506 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4507 # each INTERVAL expression into this canonical form so it's easy to transpile 4508 if this and this.is_number: 4509 this = exp.Literal.string(this.to_py()) 4510 elif this and this.is_string: 4511 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4512 if len(parts) == 1: 4513 if unit: 4514 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4515 self._retreat(self._index - 1) 4516 4517 this = exp.Literal.string(parts[0][0]) 4518 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4519 4520 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4521 unit = self.expression( 4522 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4523 ) 4524 4525 interval = self.expression(exp.Interval, this=this, unit=unit) 4526 4527 index = self._index 4528 self._match(TokenType.PLUS) 4529 4530 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4531 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4532 return self.expression( 4533 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4534 ) 4535 4536 self._retreat(index) 4537 return interval 4538 4539 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4540 this = self._parse_term() 4541 4542 while True: 4543 if self._match_set(self.BITWISE): 4544 this = self.expression( 4545 self.BITWISE[self._prev.token_type], 4546 this=this, 4547 expression=self._parse_term(), 4548 ) 4549 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4550 this = self.expression( 4551 exp.DPipe, 4552 this=this, 4553 expression=self._parse_term(), 4554 safe=not self.dialect.STRICT_STRING_CONCAT, 4555 ) 4556 elif self._match(TokenType.DQMARK): 4557 this = self.expression( 4558 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4559 ) 4560 elif self._match_pair(TokenType.LT, TokenType.LT): 4561 this = self.expression( 4562 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4563 ) 4564 elif self._match_pair(TokenType.GT, TokenType.GT): 4565 this = self.expression( 4566 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4567 ) 4568 else: 4569 break 4570 4571 return this 4572 4573 def _parse_term(self) -> t.Optional[exp.Expression]: 4574 this = self._parse_factor() 4575 4576 while self._match_set(self.TERM): 4577 klass = self.TERM[self._prev.token_type] 4578 comments = self._prev_comments 4579 expression = self._parse_factor() 4580 4581 this = self.expression(klass, this=this, comments=comments, expression=expression) 4582 4583 if isinstance(this, exp.Collate): 4584 expr = this.expression 4585 4586 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4587 # fallback to Identifier / Var 4588 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4589 ident = expr.this 4590 if isinstance(ident, exp.Identifier): 4591 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4592 4593 return this 4594 4595 def _parse_factor(self) -> t.Optional[exp.Expression]: 4596 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4597 this = parse_method() 4598 4599 while self._match_set(self.FACTOR): 4600 klass = self.FACTOR[self._prev.token_type] 4601 comments = self._prev_comments 4602 expression = parse_method() 4603 4604 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4605 self._retreat(self._index - 1) 4606 return this 4607 4608 this = self.expression(klass, this=this, comments=comments, expression=expression) 4609 4610 if isinstance(this, exp.Div): 4611 this.args["typed"] = self.dialect.TYPED_DIVISION 4612 this.args["safe"] = self.dialect.SAFE_DIVISION 4613 4614 return this 4615 4616 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4617 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4618 4619 def _parse_unary(self) -> t.Optional[exp.Expression]: 4620 if self._match_set(self.UNARY_PARSERS): 4621 return self.UNARY_PARSERS[self._prev.token_type](self) 4622 return self._parse_at_time_zone(self._parse_type()) 4623 4624 def _parse_type( 4625 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4626 ) -> t.Optional[exp.Expression]: 4627 interval = parse_interval and self._parse_interval() 4628 if interval: 4629 return interval 4630 4631 index = self._index 4632 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4633 4634 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4635 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4636 if isinstance(data_type, exp.Cast): 4637 # This constructor can contain ops directly after it, for instance struct unnesting: 4638 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4639 return self._parse_column_ops(data_type) 4640 4641 if data_type: 4642 index2 = self._index 4643 this = self._parse_primary() 4644 4645 if isinstance(this, exp.Literal): 4646 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4647 if parser: 4648 return parser(self, this, data_type) 4649 4650 return self.expression(exp.Cast, this=this, to=data_type) 4651 4652 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4653 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4654 # 4655 # If the index difference here is greater than 1, that means the parser itself must have 4656 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4657 # 4658 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4659 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4660 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4661 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4662 # 4663 # In these cases, we don't really want to return the converted type, but instead retreat 4664 # and try to parse a Column or Identifier in the section below. 4665 if data_type.expressions and index2 - index > 1: 4666 self._retreat(index2) 4667 return self._parse_column_ops(data_type) 4668 4669 self._retreat(index) 4670 4671 if fallback_to_identifier: 4672 return self._parse_id_var() 4673 4674 this = self._parse_column() 4675 return this and self._parse_column_ops(this) 4676 4677 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4678 this = self._parse_type() 4679 if not this: 4680 return None 4681 4682 if isinstance(this, exp.Column) and not this.table: 4683 this = exp.var(this.name.upper()) 4684 4685 return self.expression( 4686 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4687 ) 4688 4689 def _parse_types( 4690 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4691 ) -> t.Optional[exp.Expression]: 4692 index = self._index 4693 4694 this: t.Optional[exp.Expression] = None 4695 prefix = self._match_text_seq("SYSUDTLIB", ".") 4696 4697 if not self._match_set(self.TYPE_TOKENS): 4698 identifier = allow_identifiers and self._parse_id_var( 4699 any_token=False, tokens=(TokenType.VAR,) 4700 ) 4701 if isinstance(identifier, exp.Identifier): 4702 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4703 4704 if len(tokens) != 1: 4705 self.raise_error("Unexpected identifier", self._prev) 4706 4707 if tokens[0].token_type in self.TYPE_TOKENS: 4708 self._prev = tokens[0] 4709 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4710 type_name = identifier.name 4711 4712 while self._match(TokenType.DOT): 4713 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4714 4715 this = exp.DataType.build(type_name, udt=True) 4716 else: 4717 self._retreat(self._index - 1) 4718 return None 4719 else: 4720 return None 4721 4722 type_token = self._prev.token_type 4723 4724 if type_token == TokenType.PSEUDO_TYPE: 4725 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4726 4727 if type_token == TokenType.OBJECT_IDENTIFIER: 4728 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4729 4730 # https://materialize.com/docs/sql/types/map/ 4731 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4732 key_type = self._parse_types( 4733 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4734 ) 4735 if not self._match(TokenType.FARROW): 4736 self._retreat(index) 4737 return None 4738 4739 value_type = self._parse_types( 4740 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4741 ) 4742 if not self._match(TokenType.R_BRACKET): 4743 self._retreat(index) 4744 return None 4745 4746 return exp.DataType( 4747 this=exp.DataType.Type.MAP, 4748 expressions=[key_type, value_type], 4749 nested=True, 4750 prefix=prefix, 4751 ) 4752 4753 nested = type_token in self.NESTED_TYPE_TOKENS 4754 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4755 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4756 expressions = None 4757 maybe_func = False 4758 4759 if self._match(TokenType.L_PAREN): 4760 if is_struct: 4761 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4762 elif nested: 4763 expressions = self._parse_csv( 4764 lambda: self._parse_types( 4765 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4766 ) 4767 ) 4768 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4769 this = expressions[0] 4770 this.set("nullable", True) 4771 self._match_r_paren() 4772 return this 4773 elif type_token in self.ENUM_TYPE_TOKENS: 4774 expressions = self._parse_csv(self._parse_equality) 4775 elif is_aggregate: 4776 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4777 any_token=False, tokens=(TokenType.VAR,) 4778 ) 4779 if not func_or_ident or not self._match(TokenType.COMMA): 4780 return None 4781 expressions = self._parse_csv( 4782 lambda: self._parse_types( 4783 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4784 ) 4785 ) 4786 expressions.insert(0, func_or_ident) 4787 else: 4788 expressions = self._parse_csv(self._parse_type_size) 4789 4790 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4791 if type_token == TokenType.VECTOR and len(expressions) == 2: 4792 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4793 4794 if not expressions or not self._match(TokenType.R_PAREN): 4795 self._retreat(index) 4796 return None 4797 4798 maybe_func = True 4799 4800 values: t.Optional[t.List[exp.Expression]] = None 4801 4802 if nested and self._match(TokenType.LT): 4803 if is_struct: 4804 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4805 else: 4806 expressions = self._parse_csv( 4807 lambda: self._parse_types( 4808 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4809 ) 4810 ) 4811 4812 if not self._match(TokenType.GT): 4813 self.raise_error("Expecting >") 4814 4815 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4816 values = self._parse_csv(self._parse_assignment) 4817 if not values and is_struct: 4818 values = None 4819 self._retreat(self._index - 1) 4820 else: 4821 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4822 4823 if type_token in self.TIMESTAMPS: 4824 if self._match_text_seq("WITH", "TIME", "ZONE"): 4825 maybe_func = False 4826 tz_type = ( 4827 exp.DataType.Type.TIMETZ 4828 if type_token in self.TIMES 4829 else exp.DataType.Type.TIMESTAMPTZ 4830 ) 4831 this = exp.DataType(this=tz_type, expressions=expressions) 4832 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4833 maybe_func = False 4834 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4835 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4836 maybe_func = False 4837 elif type_token == TokenType.INTERVAL: 4838 unit = self._parse_var(upper=True) 4839 if unit: 4840 if self._match_text_seq("TO"): 4841 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4842 4843 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4844 else: 4845 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4846 4847 if maybe_func and check_func: 4848 index2 = self._index 4849 peek = self._parse_string() 4850 4851 if not peek: 4852 self._retreat(index) 4853 return None 4854 4855 self._retreat(index2) 4856 4857 if not this: 4858 if self._match_text_seq("UNSIGNED"): 4859 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4860 if not unsigned_type_token: 4861 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4862 4863 type_token = unsigned_type_token or type_token 4864 4865 this = exp.DataType( 4866 this=exp.DataType.Type[type_token.value], 4867 expressions=expressions, 4868 nested=nested, 4869 prefix=prefix, 4870 ) 4871 4872 # Empty arrays/structs are allowed 4873 if values is not None: 4874 cls = exp.Struct if is_struct else exp.Array 4875 this = exp.cast(cls(expressions=values), this, copy=False) 4876 4877 elif expressions: 4878 this.set("expressions", expressions) 4879 4880 # https://materialize.com/docs/sql/types/list/#type-name 4881 while self._match(TokenType.LIST): 4882 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4883 4884 index = self._index 4885 4886 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4887 matched_array = self._match(TokenType.ARRAY) 4888 4889 while self._curr: 4890 datatype_token = self._prev.token_type 4891 matched_l_bracket = self._match(TokenType.L_BRACKET) 4892 if not matched_l_bracket and not matched_array: 4893 break 4894 4895 matched_array = False 4896 values = self._parse_csv(self._parse_assignment) or None 4897 if ( 4898 values 4899 and not schema 4900 and ( 4901 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4902 ) 4903 ): 4904 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4905 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4906 self._retreat(index) 4907 break 4908 4909 this = exp.DataType( 4910 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4911 ) 4912 self._match(TokenType.R_BRACKET) 4913 4914 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4915 converter = self.TYPE_CONVERTERS.get(this.this) 4916 if converter: 4917 this = converter(t.cast(exp.DataType, this)) 4918 4919 return this 4920 4921 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4922 index = self._index 4923 4924 if ( 4925 self._curr 4926 and self._next 4927 and self._curr.token_type in self.TYPE_TOKENS 4928 and self._next.token_type in self.TYPE_TOKENS 4929 ): 4930 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4931 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4932 this = self._parse_id_var() 4933 else: 4934 this = ( 4935 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4936 or self._parse_id_var() 4937 ) 4938 4939 self._match(TokenType.COLON) 4940 4941 if ( 4942 type_required 4943 and not isinstance(this, exp.DataType) 4944 and not self._match_set(self.TYPE_TOKENS, advance=False) 4945 ): 4946 self._retreat(index) 4947 return self._parse_types() 4948 4949 return self._parse_column_def(this) 4950 4951 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4952 if not self._match_text_seq("AT", "TIME", "ZONE"): 4953 return this 4954 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4955 4956 def _parse_column(self) -> t.Optional[exp.Expression]: 4957 this = self._parse_column_reference() 4958 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4959 4960 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4961 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4962 4963 return column 4964 4965 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4966 this = self._parse_field() 4967 if ( 4968 not this 4969 and self._match(TokenType.VALUES, advance=False) 4970 and self.VALUES_FOLLOWED_BY_PAREN 4971 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4972 ): 4973 this = self._parse_id_var() 4974 4975 if isinstance(this, exp.Identifier): 4976 # We bubble up comments from the Identifier to the Column 4977 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4978 4979 return this 4980 4981 def _parse_colon_as_variant_extract( 4982 self, this: t.Optional[exp.Expression] 4983 ) -> t.Optional[exp.Expression]: 4984 casts = [] 4985 json_path = [] 4986 escape = None 4987 4988 while self._match(TokenType.COLON): 4989 start_index = self._index 4990 4991 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4992 path = self._parse_column_ops( 4993 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4994 ) 4995 4996 # The cast :: operator has a lower precedence than the extraction operator :, so 4997 # we rearrange the AST appropriately to avoid casting the JSON path 4998 while isinstance(path, exp.Cast): 4999 casts.append(path.to) 5000 path = path.this 5001 5002 if casts: 5003 dcolon_offset = next( 5004 i 5005 for i, t in enumerate(self._tokens[start_index:]) 5006 if t.token_type == TokenType.DCOLON 5007 ) 5008 end_token = self._tokens[start_index + dcolon_offset - 1] 5009 else: 5010 end_token = self._prev 5011 5012 if path: 5013 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5014 # it'll roundtrip to a string literal in GET_PATH 5015 if isinstance(path, exp.Identifier) and path.quoted: 5016 escape = True 5017 5018 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5019 5020 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5021 # Databricks transforms it back to the colon/dot notation 5022 if json_path: 5023 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5024 5025 if json_path_expr: 5026 json_path_expr.set("escape", escape) 5027 5028 this = self.expression( 5029 exp.JSONExtract, 5030 this=this, 5031 expression=json_path_expr, 5032 variant_extract=True, 5033 ) 5034 5035 while casts: 5036 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5037 5038 return this 5039 5040 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5041 return self._parse_types() 5042 5043 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5044 this = self._parse_bracket(this) 5045 5046 while self._match_set(self.COLUMN_OPERATORS): 5047 op_token = self._prev.token_type 5048 op = self.COLUMN_OPERATORS.get(op_token) 5049 5050 if op_token == TokenType.DCOLON: 5051 field = self._parse_dcolon() 5052 if not field: 5053 self.raise_error("Expected type") 5054 elif op and self._curr: 5055 field = self._parse_column_reference() or self._parse_bracket() 5056 else: 5057 field = self._parse_field(any_token=True, anonymous_func=True) 5058 5059 if isinstance(field, exp.Func) and this: 5060 # bigquery allows function calls like x.y.count(...) 5061 # SAFE.SUBSTR(...) 5062 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5063 this = exp.replace_tree( 5064 this, 5065 lambda n: ( 5066 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5067 if n.table 5068 else n.this 5069 ) 5070 if isinstance(n, exp.Column) 5071 else n, 5072 ) 5073 5074 if op: 5075 this = op(self, this, field) 5076 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5077 this = self.expression( 5078 exp.Column, 5079 this=field, 5080 table=this.this, 5081 db=this.args.get("table"), 5082 catalog=this.args.get("db"), 5083 ) 5084 else: 5085 this = self.expression(exp.Dot, this=this, expression=field) 5086 5087 this = self._parse_bracket(this) 5088 5089 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5090 5091 def _parse_primary(self) -> t.Optional[exp.Expression]: 5092 if self._match_set(self.PRIMARY_PARSERS): 5093 token_type = self._prev.token_type 5094 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5095 5096 if token_type == TokenType.STRING: 5097 expressions = [primary] 5098 while self._match(TokenType.STRING): 5099 expressions.append(exp.Literal.string(self._prev.text)) 5100 5101 if len(expressions) > 1: 5102 return self.expression(exp.Concat, expressions=expressions) 5103 5104 return primary 5105 5106 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5107 return exp.Literal.number(f"0.{self._prev.text}") 5108 5109 if self._match(TokenType.L_PAREN): 5110 comments = self._prev_comments 5111 query = self._parse_select() 5112 5113 if query: 5114 expressions = [query] 5115 else: 5116 expressions = self._parse_expressions() 5117 5118 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5119 5120 if not this and self._match(TokenType.R_PAREN, advance=False): 5121 this = self.expression(exp.Tuple) 5122 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5123 this = self._parse_subquery(this=this, parse_alias=False) 5124 elif isinstance(this, exp.Subquery): 5125 this = self._parse_subquery( 5126 this=self._parse_set_operations(this), parse_alias=False 5127 ) 5128 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5129 this = self.expression(exp.Tuple, expressions=expressions) 5130 else: 5131 this = self.expression(exp.Paren, this=this) 5132 5133 if this: 5134 this.add_comments(comments) 5135 5136 self._match_r_paren(expression=this) 5137 return this 5138 5139 return None 5140 5141 def _parse_field( 5142 self, 5143 any_token: bool = False, 5144 tokens: t.Optional[t.Collection[TokenType]] = None, 5145 anonymous_func: bool = False, 5146 ) -> t.Optional[exp.Expression]: 5147 if anonymous_func: 5148 field = ( 5149 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5150 or self._parse_primary() 5151 ) 5152 else: 5153 field = self._parse_primary() or self._parse_function( 5154 anonymous=anonymous_func, any_token=any_token 5155 ) 5156 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5157 5158 def _parse_function( 5159 self, 5160 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5161 anonymous: bool = False, 5162 optional_parens: bool = True, 5163 any_token: bool = False, 5164 ) -> t.Optional[exp.Expression]: 5165 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5166 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5167 fn_syntax = False 5168 if ( 5169 self._match(TokenType.L_BRACE, advance=False) 5170 and self._next 5171 and self._next.text.upper() == "FN" 5172 ): 5173 self._advance(2) 5174 fn_syntax = True 5175 5176 func = self._parse_function_call( 5177 functions=functions, 5178 anonymous=anonymous, 5179 optional_parens=optional_parens, 5180 any_token=any_token, 5181 ) 5182 5183 if fn_syntax: 5184 self._match(TokenType.R_BRACE) 5185 5186 return func 5187 5188 def _parse_function_call( 5189 self, 5190 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5191 anonymous: bool = False, 5192 optional_parens: bool = True, 5193 any_token: bool = False, 5194 ) -> t.Optional[exp.Expression]: 5195 if not self._curr: 5196 return None 5197 5198 comments = self._curr.comments 5199 token_type = self._curr.token_type 5200 this = self._curr.text 5201 upper = this.upper() 5202 5203 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5204 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5205 self._advance() 5206 return self._parse_window(parser(self)) 5207 5208 if not self._next or self._next.token_type != TokenType.L_PAREN: 5209 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5210 self._advance() 5211 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5212 5213 return None 5214 5215 if any_token: 5216 if token_type in self.RESERVED_TOKENS: 5217 return None 5218 elif token_type not in self.FUNC_TOKENS: 5219 return None 5220 5221 self._advance(2) 5222 5223 parser = self.FUNCTION_PARSERS.get(upper) 5224 if parser and not anonymous: 5225 this = parser(self) 5226 else: 5227 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5228 5229 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5230 this = self.expression(subquery_predicate, this=self._parse_select()) 5231 self._match_r_paren() 5232 return this 5233 5234 if functions is None: 5235 functions = self.FUNCTIONS 5236 5237 function = functions.get(upper) 5238 5239 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5240 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5241 5242 if alias: 5243 args = self._kv_to_prop_eq(args) 5244 5245 if function and not anonymous: 5246 if "dialect" in function.__code__.co_varnames: 5247 func = function(args, dialect=self.dialect) 5248 else: 5249 func = function(args) 5250 5251 func = self.validate_expression(func, args) 5252 if not self.dialect.NORMALIZE_FUNCTIONS: 5253 func.meta["name"] = this 5254 5255 this = func 5256 else: 5257 if token_type == TokenType.IDENTIFIER: 5258 this = exp.Identifier(this=this, quoted=True) 5259 this = self.expression(exp.Anonymous, this=this, expressions=args) 5260 5261 if isinstance(this, exp.Expression): 5262 this.add_comments(comments) 5263 5264 self._match_r_paren(this) 5265 return self._parse_window(this) 5266 5267 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5268 return expression 5269 5270 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5271 transformed = [] 5272 5273 for index, e in enumerate(expressions): 5274 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5275 if isinstance(e, exp.Alias): 5276 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5277 5278 if not isinstance(e, exp.PropertyEQ): 5279 e = self.expression( 5280 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5281 ) 5282 5283 if isinstance(e.this, exp.Column): 5284 e.this.replace(e.this.this) 5285 else: 5286 e = self._to_prop_eq(e, index) 5287 5288 transformed.append(e) 5289 5290 return transformed 5291 5292 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5293 return self._parse_column_def(self._parse_id_var()) 5294 5295 def _parse_user_defined_function( 5296 self, kind: t.Optional[TokenType] = None 5297 ) -> t.Optional[exp.Expression]: 5298 this = self._parse_id_var() 5299 5300 while self._match(TokenType.DOT): 5301 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5302 5303 if not self._match(TokenType.L_PAREN): 5304 return this 5305 5306 expressions = self._parse_csv(self._parse_function_parameter) 5307 self._match_r_paren() 5308 return self.expression( 5309 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5310 ) 5311 5312 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5313 literal = self._parse_primary() 5314 if literal: 5315 return self.expression(exp.Introducer, this=token.text, expression=literal) 5316 5317 return self.expression(exp.Identifier, this=token.text) 5318 5319 def _parse_session_parameter(self) -> exp.SessionParameter: 5320 kind = None 5321 this = self._parse_id_var() or self._parse_primary() 5322 5323 if this and self._match(TokenType.DOT): 5324 kind = this.name 5325 this = self._parse_var() or self._parse_primary() 5326 5327 return self.expression(exp.SessionParameter, this=this, kind=kind) 5328 5329 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5330 return self._parse_id_var() 5331 5332 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5333 index = self._index 5334 5335 if self._match(TokenType.L_PAREN): 5336 expressions = t.cast( 5337 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5338 ) 5339 5340 if not self._match(TokenType.R_PAREN): 5341 self._retreat(index) 5342 else: 5343 expressions = [self._parse_lambda_arg()] 5344 5345 if self._match_set(self.LAMBDAS): 5346 return self.LAMBDAS[self._prev.token_type](self, expressions) 5347 5348 self._retreat(index) 5349 5350 this: t.Optional[exp.Expression] 5351 5352 if self._match(TokenType.DISTINCT): 5353 this = self.expression( 5354 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5355 ) 5356 else: 5357 this = self._parse_select_or_expression(alias=alias) 5358 5359 return self._parse_limit( 5360 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5361 ) 5362 5363 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5364 index = self._index 5365 if not self._match(TokenType.L_PAREN): 5366 return this 5367 5368 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5369 # expr can be of both types 5370 if self._match_set(self.SELECT_START_TOKENS): 5371 self._retreat(index) 5372 return this 5373 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5374 self._match_r_paren() 5375 return self.expression(exp.Schema, this=this, expressions=args) 5376 5377 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5378 return self._parse_column_def(self._parse_field(any_token=True)) 5379 5380 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5381 # column defs are not really columns, they're identifiers 5382 if isinstance(this, exp.Column): 5383 this = this.this 5384 5385 kind = self._parse_types(schema=True) 5386 5387 if self._match_text_seq("FOR", "ORDINALITY"): 5388 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5389 5390 constraints: t.List[exp.Expression] = [] 5391 5392 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5393 ("ALIAS", "MATERIALIZED") 5394 ): 5395 persisted = self._prev.text.upper() == "MATERIALIZED" 5396 constraint_kind = exp.ComputedColumnConstraint( 5397 this=self._parse_assignment(), 5398 persisted=persisted or self._match_text_seq("PERSISTED"), 5399 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5400 ) 5401 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5402 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5403 self._match(TokenType.ALIAS) 5404 constraints.append( 5405 self.expression( 5406 exp.ColumnConstraint, 5407 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5408 ) 5409 ) 5410 5411 while True: 5412 constraint = self._parse_column_constraint() 5413 if not constraint: 5414 break 5415 constraints.append(constraint) 5416 5417 if not kind and not constraints: 5418 return this 5419 5420 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5421 5422 def _parse_auto_increment( 5423 self, 5424 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5425 start = None 5426 increment = None 5427 5428 if self._match(TokenType.L_PAREN, advance=False): 5429 args = self._parse_wrapped_csv(self._parse_bitwise) 5430 start = seq_get(args, 0) 5431 increment = seq_get(args, 1) 5432 elif self._match_text_seq("START"): 5433 start = self._parse_bitwise() 5434 self._match_text_seq("INCREMENT") 5435 increment = self._parse_bitwise() 5436 5437 if start and increment: 5438 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5439 5440 return exp.AutoIncrementColumnConstraint() 5441 5442 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5443 if not self._match_text_seq("REFRESH"): 5444 self._retreat(self._index - 1) 5445 return None 5446 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5447 5448 def _parse_compress(self) -> exp.CompressColumnConstraint: 5449 if self._match(TokenType.L_PAREN, advance=False): 5450 return self.expression( 5451 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5452 ) 5453 5454 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5455 5456 def _parse_generated_as_identity( 5457 self, 5458 ) -> ( 5459 exp.GeneratedAsIdentityColumnConstraint 5460 | exp.ComputedColumnConstraint 5461 | exp.GeneratedAsRowColumnConstraint 5462 ): 5463 if self._match_text_seq("BY", "DEFAULT"): 5464 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5465 this = self.expression( 5466 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5467 ) 5468 else: 5469 self._match_text_seq("ALWAYS") 5470 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5471 5472 self._match(TokenType.ALIAS) 5473 5474 if self._match_text_seq("ROW"): 5475 start = self._match_text_seq("START") 5476 if not start: 5477 self._match(TokenType.END) 5478 hidden = self._match_text_seq("HIDDEN") 5479 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5480 5481 identity = self._match_text_seq("IDENTITY") 5482 5483 if self._match(TokenType.L_PAREN): 5484 if self._match(TokenType.START_WITH): 5485 this.set("start", self._parse_bitwise()) 5486 if self._match_text_seq("INCREMENT", "BY"): 5487 this.set("increment", self._parse_bitwise()) 5488 if self._match_text_seq("MINVALUE"): 5489 this.set("minvalue", self._parse_bitwise()) 5490 if self._match_text_seq("MAXVALUE"): 5491 this.set("maxvalue", self._parse_bitwise()) 5492 5493 if self._match_text_seq("CYCLE"): 5494 this.set("cycle", True) 5495 elif self._match_text_seq("NO", "CYCLE"): 5496 this.set("cycle", False) 5497 5498 if not identity: 5499 this.set("expression", self._parse_range()) 5500 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5501 args = self._parse_csv(self._parse_bitwise) 5502 this.set("start", seq_get(args, 0)) 5503 this.set("increment", seq_get(args, 1)) 5504 5505 self._match_r_paren() 5506 5507 return this 5508 5509 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5510 self._match_text_seq("LENGTH") 5511 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5512 5513 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5514 if self._match_text_seq("NULL"): 5515 return self.expression(exp.NotNullColumnConstraint) 5516 if self._match_text_seq("CASESPECIFIC"): 5517 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5518 if self._match_text_seq("FOR", "REPLICATION"): 5519 return self.expression(exp.NotForReplicationColumnConstraint) 5520 5521 # Unconsume the `NOT` token 5522 self._retreat(self._index - 1) 5523 return None 5524 5525 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5526 if self._match(TokenType.CONSTRAINT): 5527 this = self._parse_id_var() 5528 else: 5529 this = None 5530 5531 if self._match_texts(self.CONSTRAINT_PARSERS): 5532 return self.expression( 5533 exp.ColumnConstraint, 5534 this=this, 5535 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5536 ) 5537 5538 return this 5539 5540 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5541 if not self._match(TokenType.CONSTRAINT): 5542 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5543 5544 return self.expression( 5545 exp.Constraint, 5546 this=self._parse_id_var(), 5547 expressions=self._parse_unnamed_constraints(), 5548 ) 5549 5550 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5551 constraints = [] 5552 while True: 5553 constraint = self._parse_unnamed_constraint() or self._parse_function() 5554 if not constraint: 5555 break 5556 constraints.append(constraint) 5557 5558 return constraints 5559 5560 def _parse_unnamed_constraint( 5561 self, constraints: t.Optional[t.Collection[str]] = None 5562 ) -> t.Optional[exp.Expression]: 5563 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5564 constraints or self.CONSTRAINT_PARSERS 5565 ): 5566 return None 5567 5568 constraint = self._prev.text.upper() 5569 if constraint not in self.CONSTRAINT_PARSERS: 5570 self.raise_error(f"No parser found for schema constraint {constraint}.") 5571 5572 return self.CONSTRAINT_PARSERS[constraint](self) 5573 5574 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5575 return self._parse_id_var(any_token=False) 5576 5577 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5578 self._match_text_seq("KEY") 5579 return self.expression( 5580 exp.UniqueColumnConstraint, 5581 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5582 this=self._parse_schema(self._parse_unique_key()), 5583 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5584 on_conflict=self._parse_on_conflict(), 5585 ) 5586 5587 def _parse_key_constraint_options(self) -> t.List[str]: 5588 options = [] 5589 while True: 5590 if not self._curr: 5591 break 5592 5593 if self._match(TokenType.ON): 5594 action = None 5595 on = self._advance_any() and self._prev.text 5596 5597 if self._match_text_seq("NO", "ACTION"): 5598 action = "NO ACTION" 5599 elif self._match_text_seq("CASCADE"): 5600 action = "CASCADE" 5601 elif self._match_text_seq("RESTRICT"): 5602 action = "RESTRICT" 5603 elif self._match_pair(TokenType.SET, TokenType.NULL): 5604 action = "SET NULL" 5605 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5606 action = "SET DEFAULT" 5607 else: 5608 self.raise_error("Invalid key constraint") 5609 5610 options.append(f"ON {on} {action}") 5611 else: 5612 var = self._parse_var_from_options( 5613 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5614 ) 5615 if not var: 5616 break 5617 options.append(var.name) 5618 5619 return options 5620 5621 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5622 if match and not self._match(TokenType.REFERENCES): 5623 return None 5624 5625 expressions = None 5626 this = self._parse_table(schema=True) 5627 options = self._parse_key_constraint_options() 5628 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5629 5630 def _parse_foreign_key(self) -> exp.ForeignKey: 5631 expressions = self._parse_wrapped_id_vars() 5632 reference = self._parse_references() 5633 options = {} 5634 5635 while self._match(TokenType.ON): 5636 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5637 self.raise_error("Expected DELETE or UPDATE") 5638 5639 kind = self._prev.text.lower() 5640 5641 if self._match_text_seq("NO", "ACTION"): 5642 action = "NO ACTION" 5643 elif self._match(TokenType.SET): 5644 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5645 action = "SET " + self._prev.text.upper() 5646 else: 5647 self._advance() 5648 action = self._prev.text.upper() 5649 5650 options[kind] = action 5651 5652 return self.expression( 5653 exp.ForeignKey, 5654 expressions=expressions, 5655 reference=reference, 5656 **options, # type: ignore 5657 ) 5658 5659 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5660 return self._parse_field() 5661 5662 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5663 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5664 self._retreat(self._index - 1) 5665 return None 5666 5667 id_vars = self._parse_wrapped_id_vars() 5668 return self.expression( 5669 exp.PeriodForSystemTimeConstraint, 5670 this=seq_get(id_vars, 0), 5671 expression=seq_get(id_vars, 1), 5672 ) 5673 5674 def _parse_primary_key( 5675 self, wrapped_optional: bool = False, in_props: bool = False 5676 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5677 desc = ( 5678 self._match_set((TokenType.ASC, TokenType.DESC)) 5679 and self._prev.token_type == TokenType.DESC 5680 ) 5681 5682 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5683 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5684 5685 expressions = self._parse_wrapped_csv( 5686 self._parse_primary_key_part, optional=wrapped_optional 5687 ) 5688 options = self._parse_key_constraint_options() 5689 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5690 5691 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5692 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5693 5694 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5695 """ 5696 Parses a datetime column in ODBC format. We parse the column into the corresponding 5697 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5698 same as we did for `DATE('yyyy-mm-dd')`. 5699 5700 Reference: 5701 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5702 """ 5703 self._match(TokenType.VAR) 5704 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5705 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5706 if not self._match(TokenType.R_BRACE): 5707 self.raise_error("Expected }") 5708 return expression 5709 5710 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5711 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5712 return this 5713 5714 bracket_kind = self._prev.token_type 5715 if ( 5716 bracket_kind == TokenType.L_BRACE 5717 and self._curr 5718 and self._curr.token_type == TokenType.VAR 5719 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5720 ): 5721 return self._parse_odbc_datetime_literal() 5722 5723 expressions = self._parse_csv( 5724 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5725 ) 5726 5727 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5728 self.raise_error("Expected ]") 5729 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5730 self.raise_error("Expected }") 5731 5732 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5733 if bracket_kind == TokenType.L_BRACE: 5734 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5735 elif not this: 5736 this = build_array_constructor( 5737 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5738 ) 5739 else: 5740 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5741 if constructor_type: 5742 return build_array_constructor( 5743 constructor_type, 5744 args=expressions, 5745 bracket_kind=bracket_kind, 5746 dialect=self.dialect, 5747 ) 5748 5749 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5750 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5751 5752 self._add_comments(this) 5753 return self._parse_bracket(this) 5754 5755 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5756 if self._match(TokenType.COLON): 5757 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5758 return this 5759 5760 def _parse_case(self) -> t.Optional[exp.Expression]: 5761 ifs = [] 5762 default = None 5763 5764 comments = self._prev_comments 5765 expression = self._parse_assignment() 5766 5767 while self._match(TokenType.WHEN): 5768 this = self._parse_assignment() 5769 self._match(TokenType.THEN) 5770 then = self._parse_assignment() 5771 ifs.append(self.expression(exp.If, this=this, true=then)) 5772 5773 if self._match(TokenType.ELSE): 5774 default = self._parse_assignment() 5775 5776 if not self._match(TokenType.END): 5777 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5778 default = exp.column("interval") 5779 else: 5780 self.raise_error("Expected END after CASE", self._prev) 5781 5782 return self.expression( 5783 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5784 ) 5785 5786 def _parse_if(self) -> t.Optional[exp.Expression]: 5787 if self._match(TokenType.L_PAREN): 5788 args = self._parse_csv(self._parse_assignment) 5789 this = self.validate_expression(exp.If.from_arg_list(args), args) 5790 self._match_r_paren() 5791 else: 5792 index = self._index - 1 5793 5794 if self.NO_PAREN_IF_COMMANDS and index == 0: 5795 return self._parse_as_command(self._prev) 5796 5797 condition = self._parse_assignment() 5798 5799 if not condition: 5800 self._retreat(index) 5801 return None 5802 5803 self._match(TokenType.THEN) 5804 true = self._parse_assignment() 5805 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5806 self._match(TokenType.END) 5807 this = self.expression(exp.If, this=condition, true=true, false=false) 5808 5809 return this 5810 5811 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5812 if not self._match_text_seq("VALUE", "FOR"): 5813 self._retreat(self._index - 1) 5814 return None 5815 5816 return self.expression( 5817 exp.NextValueFor, 5818 this=self._parse_column(), 5819 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5820 ) 5821 5822 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5823 this = self._parse_function() or self._parse_var_or_string(upper=True) 5824 5825 if self._match(TokenType.FROM): 5826 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5827 5828 if not self._match(TokenType.COMMA): 5829 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5830 5831 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5832 5833 def _parse_gap_fill(self) -> exp.GapFill: 5834 self._match(TokenType.TABLE) 5835 this = self._parse_table() 5836 5837 self._match(TokenType.COMMA) 5838 args = [this, *self._parse_csv(self._parse_lambda)] 5839 5840 gap_fill = exp.GapFill.from_arg_list(args) 5841 return self.validate_expression(gap_fill, args) 5842 5843 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5844 this = self._parse_assignment() 5845 5846 if not self._match(TokenType.ALIAS): 5847 if self._match(TokenType.COMMA): 5848 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5849 5850 self.raise_error("Expected AS after CAST") 5851 5852 fmt = None 5853 to = self._parse_types() 5854 5855 if self._match(TokenType.FORMAT): 5856 fmt_string = self._parse_string() 5857 fmt = self._parse_at_time_zone(fmt_string) 5858 5859 if not to: 5860 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5861 if to.this in exp.DataType.TEMPORAL_TYPES: 5862 this = self.expression( 5863 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5864 this=this, 5865 format=exp.Literal.string( 5866 format_time( 5867 fmt_string.this if fmt_string else "", 5868 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5869 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5870 ) 5871 ), 5872 safe=safe, 5873 ) 5874 5875 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5876 this.set("zone", fmt.args["zone"]) 5877 return this 5878 elif not to: 5879 self.raise_error("Expected TYPE after CAST") 5880 elif isinstance(to, exp.Identifier): 5881 to = exp.DataType.build(to.name, udt=True) 5882 elif to.this == exp.DataType.Type.CHAR: 5883 if self._match(TokenType.CHARACTER_SET): 5884 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5885 5886 return self.expression( 5887 exp.Cast if strict else exp.TryCast, 5888 this=this, 5889 to=to, 5890 format=fmt, 5891 safe=safe, 5892 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5893 ) 5894 5895 def _parse_string_agg(self) -> exp.Expression: 5896 if self._match(TokenType.DISTINCT): 5897 args: t.List[t.Optional[exp.Expression]] = [ 5898 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5899 ] 5900 if self._match(TokenType.COMMA): 5901 args.extend(self._parse_csv(self._parse_assignment)) 5902 else: 5903 args = self._parse_csv(self._parse_assignment) # type: ignore 5904 5905 index = self._index 5906 if not self._match(TokenType.R_PAREN) and args: 5907 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5908 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5909 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5910 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5911 5912 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5913 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5914 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5915 if not self._match_text_seq("WITHIN", "GROUP"): 5916 self._retreat(index) 5917 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5918 5919 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5920 order = self._parse_order(this=seq_get(args, 0)) 5921 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5922 5923 def _parse_convert( 5924 self, strict: bool, safe: t.Optional[bool] = None 5925 ) -> t.Optional[exp.Expression]: 5926 this = self._parse_bitwise() 5927 5928 if self._match(TokenType.USING): 5929 to: t.Optional[exp.Expression] = self.expression( 5930 exp.CharacterSet, this=self._parse_var() 5931 ) 5932 elif self._match(TokenType.COMMA): 5933 to = self._parse_types() 5934 else: 5935 to = None 5936 5937 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5938 5939 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5940 """ 5941 There are generally two variants of the DECODE function: 5942 5943 - DECODE(bin, charset) 5944 - DECODE(expression, search, result [, search, result] ... [, default]) 5945 5946 The second variant will always be parsed into a CASE expression. Note that NULL 5947 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5948 instead of relying on pattern matching. 5949 """ 5950 args = self._parse_csv(self._parse_assignment) 5951 5952 if len(args) < 3: 5953 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5954 5955 expression, *expressions = args 5956 if not expression: 5957 return None 5958 5959 ifs = [] 5960 for search, result in zip(expressions[::2], expressions[1::2]): 5961 if not search or not result: 5962 return None 5963 5964 if isinstance(search, exp.Literal): 5965 ifs.append( 5966 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5967 ) 5968 elif isinstance(search, exp.Null): 5969 ifs.append( 5970 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5971 ) 5972 else: 5973 cond = exp.or_( 5974 exp.EQ(this=expression.copy(), expression=search), 5975 exp.and_( 5976 exp.Is(this=expression.copy(), expression=exp.Null()), 5977 exp.Is(this=search.copy(), expression=exp.Null()), 5978 copy=False, 5979 ), 5980 copy=False, 5981 ) 5982 ifs.append(exp.If(this=cond, true=result)) 5983 5984 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5985 5986 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5987 self._match_text_seq("KEY") 5988 key = self._parse_column() 5989 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5990 self._match_text_seq("VALUE") 5991 value = self._parse_bitwise() 5992 5993 if not key and not value: 5994 return None 5995 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5996 5997 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5998 if not this or not self._match_text_seq("FORMAT", "JSON"): 5999 return this 6000 6001 return self.expression(exp.FormatJson, this=this) 6002 6003 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6004 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6005 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6006 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6007 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6008 else: 6009 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6010 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6011 6012 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6013 6014 if not empty and not error and not null: 6015 return None 6016 6017 return self.expression( 6018 exp.OnCondition, 6019 empty=empty, 6020 error=error, 6021 null=null, 6022 ) 6023 6024 def _parse_on_handling( 6025 self, on: str, *values: str 6026 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6027 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6028 for value in values: 6029 if self._match_text_seq(value, "ON", on): 6030 return f"{value} ON {on}" 6031 6032 index = self._index 6033 if self._match(TokenType.DEFAULT): 6034 default_value = self._parse_bitwise() 6035 if self._match_text_seq("ON", on): 6036 return default_value 6037 6038 self._retreat(index) 6039 6040 return None 6041 6042 @t.overload 6043 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6044 6045 @t.overload 6046 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6047 6048 def _parse_json_object(self, agg=False): 6049 star = self._parse_star() 6050 expressions = ( 6051 [star] 6052 if star 6053 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6054 ) 6055 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6056 6057 unique_keys = None 6058 if self._match_text_seq("WITH", "UNIQUE"): 6059 unique_keys = True 6060 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6061 unique_keys = False 6062 6063 self._match_text_seq("KEYS") 6064 6065 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6066 self._parse_type() 6067 ) 6068 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6069 6070 return self.expression( 6071 exp.JSONObjectAgg if agg else exp.JSONObject, 6072 expressions=expressions, 6073 null_handling=null_handling, 6074 unique_keys=unique_keys, 6075 return_type=return_type, 6076 encoding=encoding, 6077 ) 6078 6079 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6080 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6081 if not self._match_text_seq("NESTED"): 6082 this = self._parse_id_var() 6083 kind = self._parse_types(allow_identifiers=False) 6084 nested = None 6085 else: 6086 this = None 6087 kind = None 6088 nested = True 6089 6090 path = self._match_text_seq("PATH") and self._parse_string() 6091 nested_schema = nested and self._parse_json_schema() 6092 6093 return self.expression( 6094 exp.JSONColumnDef, 6095 this=this, 6096 kind=kind, 6097 path=path, 6098 nested_schema=nested_schema, 6099 ) 6100 6101 def _parse_json_schema(self) -> exp.JSONSchema: 6102 self._match_text_seq("COLUMNS") 6103 return self.expression( 6104 exp.JSONSchema, 6105 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6106 ) 6107 6108 def _parse_json_table(self) -> exp.JSONTable: 6109 this = self._parse_format_json(self._parse_bitwise()) 6110 path = self._match(TokenType.COMMA) and self._parse_string() 6111 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6112 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6113 schema = self._parse_json_schema() 6114 6115 return exp.JSONTable( 6116 this=this, 6117 schema=schema, 6118 path=path, 6119 error_handling=error_handling, 6120 empty_handling=empty_handling, 6121 ) 6122 6123 def _parse_match_against(self) -> exp.MatchAgainst: 6124 expressions = self._parse_csv(self._parse_column) 6125 6126 self._match_text_seq(")", "AGAINST", "(") 6127 6128 this = self._parse_string() 6129 6130 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6131 modifier = "IN NATURAL LANGUAGE MODE" 6132 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6133 modifier = f"{modifier} WITH QUERY EXPANSION" 6134 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6135 modifier = "IN BOOLEAN MODE" 6136 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6137 modifier = "WITH QUERY EXPANSION" 6138 else: 6139 modifier = None 6140 6141 return self.expression( 6142 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6143 ) 6144 6145 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6146 def _parse_open_json(self) -> exp.OpenJSON: 6147 this = self._parse_bitwise() 6148 path = self._match(TokenType.COMMA) and self._parse_string() 6149 6150 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6151 this = self._parse_field(any_token=True) 6152 kind = self._parse_types() 6153 path = self._parse_string() 6154 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6155 6156 return self.expression( 6157 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6158 ) 6159 6160 expressions = None 6161 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6162 self._match_l_paren() 6163 expressions = self._parse_csv(_parse_open_json_column_def) 6164 6165 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6166 6167 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6168 args = self._parse_csv(self._parse_bitwise) 6169 6170 if self._match(TokenType.IN): 6171 return self.expression( 6172 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6173 ) 6174 6175 if haystack_first: 6176 haystack = seq_get(args, 0) 6177 needle = seq_get(args, 1) 6178 else: 6179 needle = seq_get(args, 0) 6180 haystack = seq_get(args, 1) 6181 6182 return self.expression( 6183 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6184 ) 6185 6186 def _parse_predict(self) -> exp.Predict: 6187 self._match_text_seq("MODEL") 6188 this = self._parse_table() 6189 6190 self._match(TokenType.COMMA) 6191 self._match_text_seq("TABLE") 6192 6193 return self.expression( 6194 exp.Predict, 6195 this=this, 6196 expression=self._parse_table(), 6197 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6198 ) 6199 6200 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6201 args = self._parse_csv(self._parse_table) 6202 return exp.JoinHint(this=func_name.upper(), expressions=args) 6203 6204 def _parse_substring(self) -> exp.Substring: 6205 # Postgres supports the form: substring(string [from int] [for int]) 6206 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6207 6208 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6209 6210 if self._match(TokenType.FROM): 6211 args.append(self._parse_bitwise()) 6212 if self._match(TokenType.FOR): 6213 if len(args) == 1: 6214 args.append(exp.Literal.number(1)) 6215 args.append(self._parse_bitwise()) 6216 6217 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6218 6219 def _parse_trim(self) -> exp.Trim: 6220 # https://www.w3resource.com/sql/character-functions/trim.php 6221 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6222 6223 position = None 6224 collation = None 6225 expression = None 6226 6227 if self._match_texts(self.TRIM_TYPES): 6228 position = self._prev.text.upper() 6229 6230 this = self._parse_bitwise() 6231 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6232 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6233 expression = self._parse_bitwise() 6234 6235 if invert_order: 6236 this, expression = expression, this 6237 6238 if self._match(TokenType.COLLATE): 6239 collation = self._parse_bitwise() 6240 6241 return self.expression( 6242 exp.Trim, this=this, position=position, expression=expression, collation=collation 6243 ) 6244 6245 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6246 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6247 6248 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6249 return self._parse_window(self._parse_id_var(), alias=True) 6250 6251 def _parse_respect_or_ignore_nulls( 6252 self, this: t.Optional[exp.Expression] 6253 ) -> t.Optional[exp.Expression]: 6254 if self._match_text_seq("IGNORE", "NULLS"): 6255 return self.expression(exp.IgnoreNulls, this=this) 6256 if self._match_text_seq("RESPECT", "NULLS"): 6257 return self.expression(exp.RespectNulls, this=this) 6258 return this 6259 6260 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6261 if self._match(TokenType.HAVING): 6262 self._match_texts(("MAX", "MIN")) 6263 max = self._prev.text.upper() != "MIN" 6264 return self.expression( 6265 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6266 ) 6267 6268 return this 6269 6270 def _parse_window( 6271 self, this: t.Optional[exp.Expression], alias: bool = False 6272 ) -> t.Optional[exp.Expression]: 6273 func = this 6274 comments = func.comments if isinstance(func, exp.Expression) else None 6275 6276 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6277 self._match(TokenType.WHERE) 6278 this = self.expression( 6279 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6280 ) 6281 self._match_r_paren() 6282 6283 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6284 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6285 if self._match_text_seq("WITHIN", "GROUP"): 6286 order = self._parse_wrapped(self._parse_order) 6287 this = self.expression(exp.WithinGroup, this=this, expression=order) 6288 6289 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6290 # Some dialects choose to implement and some do not. 6291 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6292 6293 # There is some code above in _parse_lambda that handles 6294 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6295 6296 # The below changes handle 6297 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6298 6299 # Oracle allows both formats 6300 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6301 # and Snowflake chose to do the same for familiarity 6302 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6303 if isinstance(this, exp.AggFunc): 6304 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6305 6306 if ignore_respect and ignore_respect is not this: 6307 ignore_respect.replace(ignore_respect.this) 6308 this = self.expression(ignore_respect.__class__, this=this) 6309 6310 this = self._parse_respect_or_ignore_nulls(this) 6311 6312 # bigquery select from window x AS (partition by ...) 6313 if alias: 6314 over = None 6315 self._match(TokenType.ALIAS) 6316 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6317 return this 6318 else: 6319 over = self._prev.text.upper() 6320 6321 if comments and isinstance(func, exp.Expression): 6322 func.pop_comments() 6323 6324 if not self._match(TokenType.L_PAREN): 6325 return self.expression( 6326 exp.Window, 6327 comments=comments, 6328 this=this, 6329 alias=self._parse_id_var(False), 6330 over=over, 6331 ) 6332 6333 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6334 6335 first = self._match(TokenType.FIRST) 6336 if self._match_text_seq("LAST"): 6337 first = False 6338 6339 partition, order = self._parse_partition_and_order() 6340 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6341 6342 if kind: 6343 self._match(TokenType.BETWEEN) 6344 start = self._parse_window_spec() 6345 self._match(TokenType.AND) 6346 end = self._parse_window_spec() 6347 6348 spec = self.expression( 6349 exp.WindowSpec, 6350 kind=kind, 6351 start=start["value"], 6352 start_side=start["side"], 6353 end=end["value"], 6354 end_side=end["side"], 6355 ) 6356 else: 6357 spec = None 6358 6359 self._match_r_paren() 6360 6361 window = self.expression( 6362 exp.Window, 6363 comments=comments, 6364 this=this, 6365 partition_by=partition, 6366 order=order, 6367 spec=spec, 6368 alias=window_alias, 6369 over=over, 6370 first=first, 6371 ) 6372 6373 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6374 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6375 return self._parse_window(window, alias=alias) 6376 6377 return window 6378 6379 def _parse_partition_and_order( 6380 self, 6381 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6382 return self._parse_partition_by(), self._parse_order() 6383 6384 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6385 self._match(TokenType.BETWEEN) 6386 6387 return { 6388 "value": ( 6389 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6390 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6391 or self._parse_bitwise() 6392 ), 6393 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6394 } 6395 6396 def _parse_alias( 6397 self, this: t.Optional[exp.Expression], explicit: bool = False 6398 ) -> t.Optional[exp.Expression]: 6399 any_token = self._match(TokenType.ALIAS) 6400 comments = self._prev_comments or [] 6401 6402 if explicit and not any_token: 6403 return this 6404 6405 if self._match(TokenType.L_PAREN): 6406 aliases = self.expression( 6407 exp.Aliases, 6408 comments=comments, 6409 this=this, 6410 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6411 ) 6412 self._match_r_paren(aliases) 6413 return aliases 6414 6415 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6416 self.STRING_ALIASES and self._parse_string_as_identifier() 6417 ) 6418 6419 if alias: 6420 comments.extend(alias.pop_comments()) 6421 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6422 column = this.this 6423 6424 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6425 if not this.comments and column and column.comments: 6426 this.comments = column.pop_comments() 6427 6428 return this 6429 6430 def _parse_id_var( 6431 self, 6432 any_token: bool = True, 6433 tokens: t.Optional[t.Collection[TokenType]] = None, 6434 ) -> t.Optional[exp.Expression]: 6435 expression = self._parse_identifier() 6436 if not expression and ( 6437 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6438 ): 6439 quoted = self._prev.token_type == TokenType.STRING 6440 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6441 6442 return expression 6443 6444 def _parse_string(self) -> t.Optional[exp.Expression]: 6445 if self._match_set(self.STRING_PARSERS): 6446 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6447 return self._parse_placeholder() 6448 6449 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6450 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6451 6452 def _parse_number(self) -> t.Optional[exp.Expression]: 6453 if self._match_set(self.NUMERIC_PARSERS): 6454 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6455 return self._parse_placeholder() 6456 6457 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6458 if self._match(TokenType.IDENTIFIER): 6459 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6460 return self._parse_placeholder() 6461 6462 def _parse_var( 6463 self, 6464 any_token: bool = False, 6465 tokens: t.Optional[t.Collection[TokenType]] = None, 6466 upper: bool = False, 6467 ) -> t.Optional[exp.Expression]: 6468 if ( 6469 (any_token and self._advance_any()) 6470 or self._match(TokenType.VAR) 6471 or (self._match_set(tokens) if tokens else False) 6472 ): 6473 return self.expression( 6474 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6475 ) 6476 return self._parse_placeholder() 6477 6478 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6479 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6480 self._advance() 6481 return self._prev 6482 return None 6483 6484 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6485 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6486 6487 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6488 return self._parse_primary() or self._parse_var(any_token=True) 6489 6490 def _parse_null(self) -> t.Optional[exp.Expression]: 6491 if self._match_set(self.NULL_TOKENS): 6492 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6493 return self._parse_placeholder() 6494 6495 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6496 if self._match(TokenType.TRUE): 6497 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6498 if self._match(TokenType.FALSE): 6499 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6500 return self._parse_placeholder() 6501 6502 def _parse_star(self) -> t.Optional[exp.Expression]: 6503 if self._match(TokenType.STAR): 6504 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6505 return self._parse_placeholder() 6506 6507 def _parse_parameter(self) -> exp.Parameter: 6508 this = self._parse_identifier() or self._parse_primary_or_var() 6509 return self.expression(exp.Parameter, this=this) 6510 6511 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6512 if self._match_set(self.PLACEHOLDER_PARSERS): 6513 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6514 if placeholder: 6515 return placeholder 6516 self._advance(-1) 6517 return None 6518 6519 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6520 if not self._match_texts(keywords): 6521 return None 6522 if self._match(TokenType.L_PAREN, advance=False): 6523 return self._parse_wrapped_csv(self._parse_expression) 6524 6525 expression = self._parse_expression() 6526 return [expression] if expression else None 6527 6528 def _parse_csv( 6529 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6530 ) -> t.List[exp.Expression]: 6531 parse_result = parse_method() 6532 items = [parse_result] if parse_result is not None else [] 6533 6534 while self._match(sep): 6535 self._add_comments(parse_result) 6536 parse_result = parse_method() 6537 if parse_result is not None: 6538 items.append(parse_result) 6539 6540 return items 6541 6542 def _parse_tokens( 6543 self, parse_method: t.Callable, expressions: t.Dict 6544 ) -> t.Optional[exp.Expression]: 6545 this = parse_method() 6546 6547 while self._match_set(expressions): 6548 this = self.expression( 6549 expressions[self._prev.token_type], 6550 this=this, 6551 comments=self._prev_comments, 6552 expression=parse_method(), 6553 ) 6554 6555 return this 6556 6557 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6558 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6559 6560 def _parse_wrapped_csv( 6561 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6562 ) -> t.List[exp.Expression]: 6563 return self._parse_wrapped( 6564 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6565 ) 6566 6567 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6568 wrapped = self._match(TokenType.L_PAREN) 6569 if not wrapped and not optional: 6570 self.raise_error("Expecting (") 6571 parse_result = parse_method() 6572 if wrapped: 6573 self._match_r_paren() 6574 return parse_result 6575 6576 def _parse_expressions(self) -> t.List[exp.Expression]: 6577 return self._parse_csv(self._parse_expression) 6578 6579 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6580 return self._parse_select() or self._parse_set_operations( 6581 self._parse_expression() if alias else self._parse_assignment() 6582 ) 6583 6584 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6585 return self._parse_query_modifiers( 6586 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6587 ) 6588 6589 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6590 this = None 6591 if self._match_texts(self.TRANSACTION_KIND): 6592 this = self._prev.text 6593 6594 self._match_texts(("TRANSACTION", "WORK")) 6595 6596 modes = [] 6597 while True: 6598 mode = [] 6599 while self._match(TokenType.VAR): 6600 mode.append(self._prev.text) 6601 6602 if mode: 6603 modes.append(" ".join(mode)) 6604 if not self._match(TokenType.COMMA): 6605 break 6606 6607 return self.expression(exp.Transaction, this=this, modes=modes) 6608 6609 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6610 chain = None 6611 savepoint = None 6612 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6613 6614 self._match_texts(("TRANSACTION", "WORK")) 6615 6616 if self._match_text_seq("TO"): 6617 self._match_text_seq("SAVEPOINT") 6618 savepoint = self._parse_id_var() 6619 6620 if self._match(TokenType.AND): 6621 chain = not self._match_text_seq("NO") 6622 self._match_text_seq("CHAIN") 6623 6624 if is_rollback: 6625 return self.expression(exp.Rollback, savepoint=savepoint) 6626 6627 return self.expression(exp.Commit, chain=chain) 6628 6629 def _parse_refresh(self) -> exp.Refresh: 6630 self._match(TokenType.TABLE) 6631 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6632 6633 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6634 if not self._match_text_seq("ADD"): 6635 return None 6636 6637 self._match(TokenType.COLUMN) 6638 exists_column = self._parse_exists(not_=True) 6639 expression = self._parse_field_def() 6640 6641 if expression: 6642 expression.set("exists", exists_column) 6643 6644 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6645 if self._match_texts(("FIRST", "AFTER")): 6646 position = self._prev.text 6647 column_position = self.expression( 6648 exp.ColumnPosition, this=self._parse_column(), position=position 6649 ) 6650 expression.set("position", column_position) 6651 6652 return expression 6653 6654 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6655 drop = self._match(TokenType.DROP) and self._parse_drop() 6656 if drop and not isinstance(drop, exp.Command): 6657 drop.set("kind", drop.args.get("kind", "COLUMN")) 6658 return drop 6659 6660 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6661 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6662 return self.expression( 6663 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6664 ) 6665 6666 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6667 index = self._index - 1 6668 6669 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6670 return self._parse_csv( 6671 lambda: self.expression( 6672 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6673 ) 6674 ) 6675 6676 self._retreat(index) 6677 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6678 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6679 6680 if self._match_text_seq("ADD", "COLUMNS"): 6681 schema = self._parse_schema() 6682 if schema: 6683 return [schema] 6684 return [] 6685 6686 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6687 6688 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6689 if self._match_texts(self.ALTER_ALTER_PARSERS): 6690 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6691 6692 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6693 # keyword after ALTER we default to parsing this statement 6694 self._match(TokenType.COLUMN) 6695 column = self._parse_field(any_token=True) 6696 6697 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6698 return self.expression(exp.AlterColumn, this=column, drop=True) 6699 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6700 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6701 if self._match(TokenType.COMMENT): 6702 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6703 if self._match_text_seq("DROP", "NOT", "NULL"): 6704 return self.expression( 6705 exp.AlterColumn, 6706 this=column, 6707 drop=True, 6708 allow_null=True, 6709 ) 6710 if self._match_text_seq("SET", "NOT", "NULL"): 6711 return self.expression( 6712 exp.AlterColumn, 6713 this=column, 6714 allow_null=False, 6715 ) 6716 self._match_text_seq("SET", "DATA") 6717 self._match_text_seq("TYPE") 6718 return self.expression( 6719 exp.AlterColumn, 6720 this=column, 6721 dtype=self._parse_types(), 6722 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6723 using=self._match(TokenType.USING) and self._parse_assignment(), 6724 ) 6725 6726 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6727 if self._match_texts(("ALL", "EVEN", "AUTO")): 6728 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6729 6730 self._match_text_seq("KEY", "DISTKEY") 6731 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6732 6733 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6734 if compound: 6735 self._match_text_seq("SORTKEY") 6736 6737 if self._match(TokenType.L_PAREN, advance=False): 6738 return self.expression( 6739 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6740 ) 6741 6742 self._match_texts(("AUTO", "NONE")) 6743 return self.expression( 6744 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6745 ) 6746 6747 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6748 index = self._index - 1 6749 6750 partition_exists = self._parse_exists() 6751 if self._match(TokenType.PARTITION, advance=False): 6752 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6753 6754 self._retreat(index) 6755 return self._parse_csv(self._parse_drop_column) 6756 6757 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6758 if self._match(TokenType.COLUMN): 6759 exists = self._parse_exists() 6760 old_column = self._parse_column() 6761 to = self._match_text_seq("TO") 6762 new_column = self._parse_column() 6763 6764 if old_column is None or to is None or new_column is None: 6765 return None 6766 6767 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6768 6769 self._match_text_seq("TO") 6770 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6771 6772 def _parse_alter_table_set(self) -> exp.AlterSet: 6773 alter_set = self.expression(exp.AlterSet) 6774 6775 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6776 "TABLE", "PROPERTIES" 6777 ): 6778 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6779 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6780 alter_set.set("expressions", [self._parse_assignment()]) 6781 elif self._match_texts(("LOGGED", "UNLOGGED")): 6782 alter_set.set("option", exp.var(self._prev.text.upper())) 6783 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6784 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6785 elif self._match_text_seq("LOCATION"): 6786 alter_set.set("location", self._parse_field()) 6787 elif self._match_text_seq("ACCESS", "METHOD"): 6788 alter_set.set("access_method", self._parse_field()) 6789 elif self._match_text_seq("TABLESPACE"): 6790 alter_set.set("tablespace", self._parse_field()) 6791 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6792 alter_set.set("file_format", [self._parse_field()]) 6793 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6794 alter_set.set("file_format", self._parse_wrapped_options()) 6795 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6796 alter_set.set("copy_options", self._parse_wrapped_options()) 6797 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6798 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6799 else: 6800 if self._match_text_seq("SERDE"): 6801 alter_set.set("serde", self._parse_field()) 6802 6803 alter_set.set("expressions", [self._parse_properties()]) 6804 6805 return alter_set 6806 6807 def _parse_alter(self) -> exp.Alter | exp.Command: 6808 start = self._prev 6809 6810 alter_token = self._match_set(self.ALTERABLES) and self._prev 6811 if not alter_token: 6812 return self._parse_as_command(start) 6813 6814 exists = self._parse_exists() 6815 only = self._match_text_seq("ONLY") 6816 this = self._parse_table(schema=True) 6817 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6818 6819 if self._next: 6820 self._advance() 6821 6822 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6823 if parser: 6824 actions = ensure_list(parser(self)) 6825 not_valid = self._match_text_seq("NOT", "VALID") 6826 options = self._parse_csv(self._parse_property) 6827 6828 if not self._curr and actions: 6829 return self.expression( 6830 exp.Alter, 6831 this=this, 6832 kind=alter_token.text.upper(), 6833 exists=exists, 6834 actions=actions, 6835 only=only, 6836 options=options, 6837 cluster=cluster, 6838 not_valid=not_valid, 6839 ) 6840 6841 return self._parse_as_command(start) 6842 6843 def _parse_merge(self) -> exp.Merge: 6844 self._match(TokenType.INTO) 6845 target = self._parse_table() 6846 6847 if target and self._match(TokenType.ALIAS, advance=False): 6848 target.set("alias", self._parse_table_alias()) 6849 6850 self._match(TokenType.USING) 6851 using = self._parse_table() 6852 6853 self._match(TokenType.ON) 6854 on = self._parse_assignment() 6855 6856 return self.expression( 6857 exp.Merge, 6858 this=target, 6859 using=using, 6860 on=on, 6861 expressions=self._parse_when_matched(), 6862 returning=self._parse_returning(), 6863 ) 6864 6865 def _parse_when_matched(self) -> t.List[exp.When]: 6866 whens = [] 6867 6868 while self._match(TokenType.WHEN): 6869 matched = not self._match(TokenType.NOT) 6870 self._match_text_seq("MATCHED") 6871 source = ( 6872 False 6873 if self._match_text_seq("BY", "TARGET") 6874 else self._match_text_seq("BY", "SOURCE") 6875 ) 6876 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6877 6878 self._match(TokenType.THEN) 6879 6880 if self._match(TokenType.INSERT): 6881 this = self._parse_star() 6882 if this: 6883 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6884 else: 6885 then = self.expression( 6886 exp.Insert, 6887 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6888 expression=self._match_text_seq("VALUES") and self._parse_value(), 6889 ) 6890 elif self._match(TokenType.UPDATE): 6891 expressions = self._parse_star() 6892 if expressions: 6893 then = self.expression(exp.Update, expressions=expressions) 6894 else: 6895 then = self.expression( 6896 exp.Update, 6897 expressions=self._match(TokenType.SET) 6898 and self._parse_csv(self._parse_equality), 6899 ) 6900 elif self._match(TokenType.DELETE): 6901 then = self.expression(exp.Var, this=self._prev.text) 6902 else: 6903 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6904 6905 whens.append( 6906 self.expression( 6907 exp.When, 6908 matched=matched, 6909 source=source, 6910 condition=condition, 6911 then=then, 6912 ) 6913 ) 6914 return whens 6915 6916 def _parse_show(self) -> t.Optional[exp.Expression]: 6917 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6918 if parser: 6919 return parser(self) 6920 return self._parse_as_command(self._prev) 6921 6922 def _parse_set_item_assignment( 6923 self, kind: t.Optional[str] = None 6924 ) -> t.Optional[exp.Expression]: 6925 index = self._index 6926 6927 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6928 return self._parse_set_transaction(global_=kind == "GLOBAL") 6929 6930 left = self._parse_primary() or self._parse_column() 6931 assignment_delimiter = self._match_texts(("=", "TO")) 6932 6933 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6934 self._retreat(index) 6935 return None 6936 6937 right = self._parse_statement() or self._parse_id_var() 6938 if isinstance(right, (exp.Column, exp.Identifier)): 6939 right = exp.var(right.name) 6940 6941 this = self.expression(exp.EQ, this=left, expression=right) 6942 return self.expression(exp.SetItem, this=this, kind=kind) 6943 6944 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6945 self._match_text_seq("TRANSACTION") 6946 characteristics = self._parse_csv( 6947 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6948 ) 6949 return self.expression( 6950 exp.SetItem, 6951 expressions=characteristics, 6952 kind="TRANSACTION", 6953 **{"global": global_}, # type: ignore 6954 ) 6955 6956 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6957 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6958 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6959 6960 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6961 index = self._index 6962 set_ = self.expression( 6963 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6964 ) 6965 6966 if self._curr: 6967 self._retreat(index) 6968 return self._parse_as_command(self._prev) 6969 6970 return set_ 6971 6972 def _parse_var_from_options( 6973 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6974 ) -> t.Optional[exp.Var]: 6975 start = self._curr 6976 if not start: 6977 return None 6978 6979 option = start.text.upper() 6980 continuations = options.get(option) 6981 6982 index = self._index 6983 self._advance() 6984 for keywords in continuations or []: 6985 if isinstance(keywords, str): 6986 keywords = (keywords,) 6987 6988 if self._match_text_seq(*keywords): 6989 option = f"{option} {' '.join(keywords)}" 6990 break 6991 else: 6992 if continuations or continuations is None: 6993 if raise_unmatched: 6994 self.raise_error(f"Unknown option {option}") 6995 6996 self._retreat(index) 6997 return None 6998 6999 return exp.var(option) 7000 7001 def _parse_as_command(self, start: Token) -> exp.Command: 7002 while self._curr: 7003 self._advance() 7004 text = self._find_sql(start, self._prev) 7005 size = len(start.text) 7006 self._warn_unsupported() 7007 return exp.Command(this=text[:size], expression=text[size:]) 7008 7009 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7010 settings = [] 7011 7012 self._match_l_paren() 7013 kind = self._parse_id_var() 7014 7015 if self._match(TokenType.L_PAREN): 7016 while True: 7017 key = self._parse_id_var() 7018 value = self._parse_primary() 7019 7020 if not key and value is None: 7021 break 7022 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7023 self._match(TokenType.R_PAREN) 7024 7025 self._match_r_paren() 7026 7027 return self.expression( 7028 exp.DictProperty, 7029 this=this, 7030 kind=kind.this if kind else None, 7031 settings=settings, 7032 ) 7033 7034 def _parse_dict_range(self, this: str) -> exp.DictRange: 7035 self._match_l_paren() 7036 has_min = self._match_text_seq("MIN") 7037 if has_min: 7038 min = self._parse_var() or self._parse_primary() 7039 self._match_text_seq("MAX") 7040 max = self._parse_var() or self._parse_primary() 7041 else: 7042 max = self._parse_var() or self._parse_primary() 7043 min = exp.Literal.number(0) 7044 self._match_r_paren() 7045 return self.expression(exp.DictRange, this=this, min=min, max=max) 7046 7047 def _parse_comprehension( 7048 self, this: t.Optional[exp.Expression] 7049 ) -> t.Optional[exp.Comprehension]: 7050 index = self._index 7051 expression = self._parse_column() 7052 if not self._match(TokenType.IN): 7053 self._retreat(index - 1) 7054 return None 7055 iterator = self._parse_column() 7056 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7057 return self.expression( 7058 exp.Comprehension, 7059 this=this, 7060 expression=expression, 7061 iterator=iterator, 7062 condition=condition, 7063 ) 7064 7065 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7066 if self._match(TokenType.HEREDOC_STRING): 7067 return self.expression(exp.Heredoc, this=self._prev.text) 7068 7069 if not self._match_text_seq("$"): 7070 return None 7071 7072 tags = ["$"] 7073 tag_text = None 7074 7075 if self._is_connected(): 7076 self._advance() 7077 tags.append(self._prev.text.upper()) 7078 else: 7079 self.raise_error("No closing $ found") 7080 7081 if tags[-1] != "$": 7082 if self._is_connected() and self._match_text_seq("$"): 7083 tag_text = tags[-1] 7084 tags.append("$") 7085 else: 7086 self.raise_error("No closing $ found") 7087 7088 heredoc_start = self._curr 7089 7090 while self._curr: 7091 if self._match_text_seq(*tags, advance=False): 7092 this = self._find_sql(heredoc_start, self._prev) 7093 self._advance(len(tags)) 7094 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7095 7096 self._advance() 7097 7098 self.raise_error(f"No closing {''.join(tags)} found") 7099 return None 7100 7101 def _find_parser( 7102 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7103 ) -> t.Optional[t.Callable]: 7104 if not self._curr: 7105 return None 7106 7107 index = self._index 7108 this = [] 7109 while True: 7110 # The current token might be multiple words 7111 curr = self._curr.text.upper() 7112 key = curr.split(" ") 7113 this.append(curr) 7114 7115 self._advance() 7116 result, trie = in_trie(trie, key) 7117 if result == TrieResult.FAILED: 7118 break 7119 7120 if result == TrieResult.EXISTS: 7121 subparser = parsers[" ".join(this)] 7122 return subparser 7123 7124 self._retreat(index) 7125 return None 7126 7127 def _match(self, token_type, advance=True, expression=None): 7128 if not self._curr: 7129 return None 7130 7131 if self._curr.token_type == token_type: 7132 if advance: 7133 self._advance() 7134 self._add_comments(expression) 7135 return True 7136 7137 return None 7138 7139 def _match_set(self, types, advance=True): 7140 if not self._curr: 7141 return None 7142 7143 if self._curr.token_type in types: 7144 if advance: 7145 self._advance() 7146 return True 7147 7148 return None 7149 7150 def _match_pair(self, token_type_a, token_type_b, advance=True): 7151 if not self._curr or not self._next: 7152 return None 7153 7154 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7155 if advance: 7156 self._advance(2) 7157 return True 7158 7159 return None 7160 7161 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7162 if not self._match(TokenType.L_PAREN, expression=expression): 7163 self.raise_error("Expecting (") 7164 7165 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7166 if not self._match(TokenType.R_PAREN, expression=expression): 7167 self.raise_error("Expecting )") 7168 7169 def _match_texts(self, texts, advance=True): 7170 if ( 7171 self._curr 7172 and self._curr.token_type != TokenType.STRING 7173 and self._curr.text.upper() in texts 7174 ): 7175 if advance: 7176 self._advance() 7177 return True 7178 return None 7179 7180 def _match_text_seq(self, *texts, advance=True): 7181 index = self._index 7182 for text in texts: 7183 if ( 7184 self._curr 7185 and self._curr.token_type != TokenType.STRING 7186 and self._curr.text.upper() == text 7187 ): 7188 self._advance() 7189 else: 7190 self._retreat(index) 7191 return None 7192 7193 if not advance: 7194 self._retreat(index) 7195 7196 return True 7197 7198 def _replace_lambda( 7199 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7200 ) -> t.Optional[exp.Expression]: 7201 if not node: 7202 return node 7203 7204 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7205 7206 for column in node.find_all(exp.Column): 7207 typ = lambda_types.get(column.parts[0].name) 7208 if typ is not None: 7209 dot_or_id = column.to_dot() if column.table else column.this 7210 7211 if typ: 7212 dot_or_id = self.expression( 7213 exp.Cast, 7214 this=dot_or_id, 7215 to=typ, 7216 ) 7217 7218 parent = column.parent 7219 7220 while isinstance(parent, exp.Dot): 7221 if not isinstance(parent.parent, exp.Dot): 7222 parent.replace(dot_or_id) 7223 break 7224 parent = parent.parent 7225 else: 7226 if column is node: 7227 node = dot_or_id 7228 else: 7229 column.replace(dot_or_id) 7230 return node 7231 7232 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7233 start = self._prev 7234 7235 # Not to be confused with TRUNCATE(number, decimals) function call 7236 if self._match(TokenType.L_PAREN): 7237 self._retreat(self._index - 2) 7238 return self._parse_function() 7239 7240 # Clickhouse supports TRUNCATE DATABASE as well 7241 is_database = self._match(TokenType.DATABASE) 7242 7243 self._match(TokenType.TABLE) 7244 7245 exists = self._parse_exists(not_=False) 7246 7247 expressions = self._parse_csv( 7248 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7249 ) 7250 7251 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7252 7253 if self._match_text_seq("RESTART", "IDENTITY"): 7254 identity = "RESTART" 7255 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7256 identity = "CONTINUE" 7257 else: 7258 identity = None 7259 7260 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7261 option = self._prev.text 7262 else: 7263 option = None 7264 7265 partition = self._parse_partition() 7266 7267 # Fallback case 7268 if self._curr: 7269 return self._parse_as_command(start) 7270 7271 return self.expression( 7272 exp.TruncateTable, 7273 expressions=expressions, 7274 is_database=is_database, 7275 exists=exists, 7276 cluster=cluster, 7277 identity=identity, 7278 option=option, 7279 partition=partition, 7280 ) 7281 7282 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7283 this = self._parse_ordered(self._parse_opclass) 7284 7285 if not self._match(TokenType.WITH): 7286 return this 7287 7288 op = self._parse_var(any_token=True) 7289 7290 return self.expression(exp.WithOperator, this=this, op=op) 7291 7292 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7293 self._match(TokenType.EQ) 7294 self._match(TokenType.L_PAREN) 7295 7296 opts: t.List[t.Optional[exp.Expression]] = [] 7297 while self._curr and not self._match(TokenType.R_PAREN): 7298 if self._match_text_seq("FORMAT_NAME", "="): 7299 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7300 # so we parse it separately to use _parse_field() 7301 prop = self.expression( 7302 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7303 ) 7304 opts.append(prop) 7305 else: 7306 opts.append(self._parse_property()) 7307 7308 self._match(TokenType.COMMA) 7309 7310 return opts 7311 7312 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7313 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7314 7315 options = [] 7316 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7317 option = self._parse_var(any_token=True) 7318 prev = self._prev.text.upper() 7319 7320 # Different dialects might separate options and values by white space, "=" and "AS" 7321 self._match(TokenType.EQ) 7322 self._match(TokenType.ALIAS) 7323 7324 param = self.expression(exp.CopyParameter, this=option) 7325 7326 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7327 TokenType.L_PAREN, advance=False 7328 ): 7329 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7330 param.set("expressions", self._parse_wrapped_options()) 7331 elif prev == "FILE_FORMAT": 7332 # T-SQL's external file format case 7333 param.set("expression", self._parse_field()) 7334 else: 7335 param.set("expression", self._parse_unquoted_field()) 7336 7337 options.append(param) 7338 self._match(sep) 7339 7340 return options 7341 7342 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7343 expr = self.expression(exp.Credentials) 7344 7345 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7346 expr.set("storage", self._parse_field()) 7347 if self._match_text_seq("CREDENTIALS"): 7348 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7349 creds = ( 7350 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7351 ) 7352 expr.set("credentials", creds) 7353 if self._match_text_seq("ENCRYPTION"): 7354 expr.set("encryption", self._parse_wrapped_options()) 7355 if self._match_text_seq("IAM_ROLE"): 7356 expr.set("iam_role", self._parse_field()) 7357 if self._match_text_seq("REGION"): 7358 expr.set("region", self._parse_field()) 7359 7360 return expr 7361 7362 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7363 return self._parse_field() 7364 7365 def _parse_copy(self) -> exp.Copy | exp.Command: 7366 start = self._prev 7367 7368 self._match(TokenType.INTO) 7369 7370 this = ( 7371 self._parse_select(nested=True, parse_subquery_alias=False) 7372 if self._match(TokenType.L_PAREN, advance=False) 7373 else self._parse_table(schema=True) 7374 ) 7375 7376 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7377 7378 files = self._parse_csv(self._parse_file_location) 7379 credentials = self._parse_credentials() 7380 7381 self._match_text_seq("WITH") 7382 7383 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7384 7385 # Fallback case 7386 if self._curr: 7387 return self._parse_as_command(start) 7388 7389 return self.expression( 7390 exp.Copy, 7391 this=this, 7392 kind=kind, 7393 credentials=credentials, 7394 files=files, 7395 params=params, 7396 ) 7397 7398 def _parse_normalize(self) -> exp.Normalize: 7399 return self.expression( 7400 exp.Normalize, 7401 this=self._parse_bitwise(), 7402 form=self._match(TokenType.COMMA) and self._parse_var(), 7403 ) 7404 7405 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7406 if self._match_text_seq("COLUMNS", "(", advance=False): 7407 this = self._parse_function() 7408 if isinstance(this, exp.Columns): 7409 this.set("unpack", True) 7410 return this 7411 7412 return self.expression( 7413 exp.Star, 7414 **{ # type: ignore 7415 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7416 "replace": self._parse_star_op("REPLACE"), 7417 "rename": self._parse_star_op("RENAME"), 7418 }, 7419 ) 7420 7421 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7422 privilege_parts = [] 7423 7424 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7425 # (end of privilege list) or L_PAREN (start of column list) are met 7426 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7427 privilege_parts.append(self._curr.text.upper()) 7428 self._advance() 7429 7430 this = exp.var(" ".join(privilege_parts)) 7431 expressions = ( 7432 self._parse_wrapped_csv(self._parse_column) 7433 if self._match(TokenType.L_PAREN, advance=False) 7434 else None 7435 ) 7436 7437 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7438 7439 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7440 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7441 principal = self._parse_id_var() 7442 7443 if not principal: 7444 return None 7445 7446 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7447 7448 def _parse_grant(self) -> exp.Grant | exp.Command: 7449 start = self._prev 7450 7451 privileges = self._parse_csv(self._parse_grant_privilege) 7452 7453 self._match(TokenType.ON) 7454 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7455 7456 # Attempt to parse the securable e.g. MySQL allows names 7457 # such as "foo.*", "*.*" which are not easily parseable yet 7458 securable = self._try_parse(self._parse_table_parts) 7459 7460 if not securable or not self._match_text_seq("TO"): 7461 return self._parse_as_command(start) 7462 7463 principals = self._parse_csv(self._parse_grant_principal) 7464 7465 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7466 7467 if self._curr: 7468 return self._parse_as_command(start) 7469 7470 return self.expression( 7471 exp.Grant, 7472 privileges=privileges, 7473 kind=kind, 7474 securable=securable, 7475 principals=principals, 7476 grant_option=grant_option, 7477 ) 7478 7479 def _parse_overlay(self) -> exp.Overlay: 7480 return self.expression( 7481 exp.Overlay, 7482 **{ # type: ignore 7483 "this": self._parse_bitwise(), 7484 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7485 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7486 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7487 }, 7488 )
26def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 27 if len(args) == 1 and args[0].is_star: 28 return exp.StarMap(this=args[0]) 29 30 keys = [] 31 values = [] 32 for i in range(0, len(args), 2): 33 keys.append(args[i]) 34 values.append(args[i + 1]) 35 36 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
44def binary_range_parser( 45 expr_type: t.Type[exp.Expression], reverse_args: bool = False 46) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 47 def _parse_binary_range( 48 self: Parser, this: t.Optional[exp.Expression] 49 ) -> t.Optional[exp.Expression]: 50 expression = self._parse_bitwise() 51 if reverse_args: 52 this, expression = expression, this 53 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 54 55 return _parse_binary_range
58def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 59 # Default argument order is base, expression 60 this = seq_get(args, 0) 61 expression = seq_get(args, 1) 62 63 if expression: 64 if not dialect.LOG_BASE_FIRST: 65 this, expression = expression, this 66 return exp.Log(this=this, expression=expression) 67 68 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
88def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 89 def _builder(args: t.List, dialect: Dialect) -> E: 90 expression = expr_type( 91 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 92 ) 93 if len(args) > 2 and expr_type is exp.JSONExtract: 94 expression.set("expressions", args[2:]) 95 96 return expression 97 98 return _builder
101def build_mod(args: t.List) -> exp.Mod: 102 this = seq_get(args, 0) 103 expression = seq_get(args, 1) 104 105 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 106 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 107 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 108 109 return exp.Mod(this=this, expression=expression)
121def build_array_constructor( 122 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 123) -> exp.Expression: 124 array_exp = exp_class(expressions=args) 125 126 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 127 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 128 129 return array_exp
132def build_convert_timezone( 133 args: t.List, default_source_tz: t.Optional[str] = None 134) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 135 if len(args) == 2: 136 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 137 return exp.ConvertTimezone( 138 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 139 ) 140 141 return exp.ConvertTimezone.from_arg_list(args)
166class Parser(metaclass=_Parser): 167 """ 168 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 169 170 Args: 171 error_level: The desired error level. 172 Default: ErrorLevel.IMMEDIATE 173 error_message_context: The amount of context to capture from a query string when displaying 174 the error message (in number of characters). 175 Default: 100 176 max_errors: Maximum number of error messages to include in a raised ParseError. 177 This is only relevant if error_level is ErrorLevel.RAISE. 178 Default: 3 179 """ 180 181 FUNCTIONS: t.Dict[str, t.Callable] = { 182 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 183 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 184 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 185 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 186 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 187 ), 188 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 189 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 190 ), 191 "CHAR": lambda args: exp.Chr(expressions=args), 192 "CHR": lambda args: exp.Chr(expressions=args), 193 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 194 "CONCAT": lambda args, dialect: exp.Concat( 195 expressions=args, 196 safe=not dialect.STRICT_STRING_CONCAT, 197 coalesce=dialect.CONCAT_COALESCE, 198 ), 199 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 200 expressions=args, 201 safe=not dialect.STRICT_STRING_CONCAT, 202 coalesce=dialect.CONCAT_COALESCE, 203 ), 204 "CONVERT_TIMEZONE": build_convert_timezone, 205 "DATE_TO_DATE_STR": lambda args: exp.Cast( 206 this=seq_get(args, 0), 207 to=exp.DataType(this=exp.DataType.Type.TEXT), 208 ), 209 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 210 start=seq_get(args, 0), 211 end=seq_get(args, 1), 212 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.number(1), unit=exp.var("DAY")), 213 ), 214 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 215 "HEX": build_hex, 216 "INSTR": lambda args: exp.StrPosition(this=seq_get(args, 0), substr=seq_get(args, 1)), 217 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 218 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 219 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 220 "LIKE": build_like, 221 "LOG": build_logarithm, 222 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 223 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 224 "LOWER": build_lower, 225 "LPAD": lambda args: build_pad(args), 226 "LEFTPAD": lambda args: build_pad(args), 227 "LTRIM": lambda args: build_trim(args), 228 "MOD": build_mod, 229 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 230 "RPAD": lambda args: build_pad(args, is_left=False), 231 "RTRIM": lambda args: build_trim(args, is_left=False), 232 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 233 if len(args) != 2 234 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 235 "TIME_TO_TIME_STR": lambda args: exp.Cast( 236 this=seq_get(args, 0), 237 to=exp.DataType(this=exp.DataType.Type.TEXT), 238 ), 239 "TO_HEX": build_hex, 240 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 241 this=exp.Cast( 242 this=seq_get(args, 0), 243 to=exp.DataType(this=exp.DataType.Type.TEXT), 244 ), 245 start=exp.Literal.number(1), 246 length=exp.Literal.number(10), 247 ), 248 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 249 "UPPER": build_upper, 250 "VAR_MAP": build_var_map, 251 } 252 253 NO_PAREN_FUNCTIONS = { 254 TokenType.CURRENT_DATE: exp.CurrentDate, 255 TokenType.CURRENT_DATETIME: exp.CurrentDate, 256 TokenType.CURRENT_TIME: exp.CurrentTime, 257 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 258 TokenType.CURRENT_USER: exp.CurrentUser, 259 } 260 261 STRUCT_TYPE_TOKENS = { 262 TokenType.NESTED, 263 TokenType.OBJECT, 264 TokenType.STRUCT, 265 TokenType.UNION, 266 } 267 268 NESTED_TYPE_TOKENS = { 269 TokenType.ARRAY, 270 TokenType.LIST, 271 TokenType.LOWCARDINALITY, 272 TokenType.MAP, 273 TokenType.NULLABLE, 274 TokenType.RANGE, 275 *STRUCT_TYPE_TOKENS, 276 } 277 278 ENUM_TYPE_TOKENS = { 279 TokenType.ENUM, 280 TokenType.ENUM8, 281 TokenType.ENUM16, 282 } 283 284 AGGREGATE_TYPE_TOKENS = { 285 TokenType.AGGREGATEFUNCTION, 286 TokenType.SIMPLEAGGREGATEFUNCTION, 287 } 288 289 TYPE_TOKENS = { 290 TokenType.BIT, 291 TokenType.BOOLEAN, 292 TokenType.TINYINT, 293 TokenType.UTINYINT, 294 TokenType.SMALLINT, 295 TokenType.USMALLINT, 296 TokenType.INT, 297 TokenType.UINT, 298 TokenType.BIGINT, 299 TokenType.UBIGINT, 300 TokenType.INT128, 301 TokenType.UINT128, 302 TokenType.INT256, 303 TokenType.UINT256, 304 TokenType.MEDIUMINT, 305 TokenType.UMEDIUMINT, 306 TokenType.FIXEDSTRING, 307 TokenType.FLOAT, 308 TokenType.DOUBLE, 309 TokenType.CHAR, 310 TokenType.NCHAR, 311 TokenType.VARCHAR, 312 TokenType.NVARCHAR, 313 TokenType.BPCHAR, 314 TokenType.TEXT, 315 TokenType.MEDIUMTEXT, 316 TokenType.LONGTEXT, 317 TokenType.MEDIUMBLOB, 318 TokenType.LONGBLOB, 319 TokenType.BINARY, 320 TokenType.VARBINARY, 321 TokenType.JSON, 322 TokenType.JSONB, 323 TokenType.INTERVAL, 324 TokenType.TINYBLOB, 325 TokenType.TINYTEXT, 326 TokenType.TIME, 327 TokenType.TIMETZ, 328 TokenType.TIMESTAMP, 329 TokenType.TIMESTAMP_S, 330 TokenType.TIMESTAMP_MS, 331 TokenType.TIMESTAMP_NS, 332 TokenType.TIMESTAMPTZ, 333 TokenType.TIMESTAMPLTZ, 334 TokenType.TIMESTAMPNTZ, 335 TokenType.DATETIME, 336 TokenType.DATETIME64, 337 TokenType.DATE, 338 TokenType.DATE32, 339 TokenType.INT4RANGE, 340 TokenType.INT4MULTIRANGE, 341 TokenType.INT8RANGE, 342 TokenType.INT8MULTIRANGE, 343 TokenType.NUMRANGE, 344 TokenType.NUMMULTIRANGE, 345 TokenType.TSRANGE, 346 TokenType.TSMULTIRANGE, 347 TokenType.TSTZRANGE, 348 TokenType.TSTZMULTIRANGE, 349 TokenType.DATERANGE, 350 TokenType.DATEMULTIRANGE, 351 TokenType.DECIMAL, 352 TokenType.DECIMAL32, 353 TokenType.DECIMAL64, 354 TokenType.DECIMAL128, 355 TokenType.UDECIMAL, 356 TokenType.BIGDECIMAL, 357 TokenType.UUID, 358 TokenType.GEOGRAPHY, 359 TokenType.GEOMETRY, 360 TokenType.HLLSKETCH, 361 TokenType.HSTORE, 362 TokenType.PSEUDO_TYPE, 363 TokenType.SUPER, 364 TokenType.SERIAL, 365 TokenType.SMALLSERIAL, 366 TokenType.BIGSERIAL, 367 TokenType.XML, 368 TokenType.YEAR, 369 TokenType.UNIQUEIDENTIFIER, 370 TokenType.USERDEFINED, 371 TokenType.MONEY, 372 TokenType.SMALLMONEY, 373 TokenType.ROWVERSION, 374 TokenType.IMAGE, 375 TokenType.VARIANT, 376 TokenType.VECTOR, 377 TokenType.OBJECT, 378 TokenType.OBJECT_IDENTIFIER, 379 TokenType.INET, 380 TokenType.IPADDRESS, 381 TokenType.IPPREFIX, 382 TokenType.IPV4, 383 TokenType.IPV6, 384 TokenType.UNKNOWN, 385 TokenType.NULL, 386 TokenType.NAME, 387 TokenType.TDIGEST, 388 *ENUM_TYPE_TOKENS, 389 *NESTED_TYPE_TOKENS, 390 *AGGREGATE_TYPE_TOKENS, 391 } 392 393 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 394 TokenType.BIGINT: TokenType.UBIGINT, 395 TokenType.INT: TokenType.UINT, 396 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 397 TokenType.SMALLINT: TokenType.USMALLINT, 398 TokenType.TINYINT: TokenType.UTINYINT, 399 TokenType.DECIMAL: TokenType.UDECIMAL, 400 } 401 402 SUBQUERY_PREDICATES = { 403 TokenType.ANY: exp.Any, 404 TokenType.ALL: exp.All, 405 TokenType.EXISTS: exp.Exists, 406 TokenType.SOME: exp.Any, 407 } 408 409 RESERVED_TOKENS = { 410 *Tokenizer.SINGLE_TOKENS.values(), 411 TokenType.SELECT, 412 } - {TokenType.IDENTIFIER} 413 414 DB_CREATABLES = { 415 TokenType.DATABASE, 416 TokenType.DICTIONARY, 417 TokenType.MODEL, 418 TokenType.SCHEMA, 419 TokenType.SEQUENCE, 420 TokenType.STORAGE_INTEGRATION, 421 TokenType.TABLE, 422 TokenType.TAG, 423 TokenType.VIEW, 424 TokenType.WAREHOUSE, 425 TokenType.STREAMLIT, 426 } 427 428 CREATABLES = { 429 TokenType.COLUMN, 430 TokenType.CONSTRAINT, 431 TokenType.FOREIGN_KEY, 432 TokenType.FUNCTION, 433 TokenType.INDEX, 434 TokenType.PROCEDURE, 435 *DB_CREATABLES, 436 } 437 438 ALTERABLES = { 439 TokenType.INDEX, 440 TokenType.TABLE, 441 TokenType.VIEW, 442 } 443 444 # Tokens that can represent identifiers 445 ID_VAR_TOKENS = { 446 TokenType.ALL, 447 TokenType.VAR, 448 TokenType.ANTI, 449 TokenType.APPLY, 450 TokenType.ASC, 451 TokenType.ASOF, 452 TokenType.AUTO_INCREMENT, 453 TokenType.BEGIN, 454 TokenType.BPCHAR, 455 TokenType.CACHE, 456 TokenType.CASE, 457 TokenType.COLLATE, 458 TokenType.COMMAND, 459 TokenType.COMMENT, 460 TokenType.COMMIT, 461 TokenType.CONSTRAINT, 462 TokenType.COPY, 463 TokenType.CUBE, 464 TokenType.DEFAULT, 465 TokenType.DELETE, 466 TokenType.DESC, 467 TokenType.DESCRIBE, 468 TokenType.DICTIONARY, 469 TokenType.DIV, 470 TokenType.END, 471 TokenType.EXECUTE, 472 TokenType.ESCAPE, 473 TokenType.FALSE, 474 TokenType.FIRST, 475 TokenType.FILTER, 476 TokenType.FINAL, 477 TokenType.FORMAT, 478 TokenType.FULL, 479 TokenType.IDENTIFIER, 480 TokenType.IS, 481 TokenType.ISNULL, 482 TokenType.INTERVAL, 483 TokenType.KEEP, 484 TokenType.KILL, 485 TokenType.LEFT, 486 TokenType.LOAD, 487 TokenType.MERGE, 488 TokenType.NATURAL, 489 TokenType.NEXT, 490 TokenType.OFFSET, 491 TokenType.OPERATOR, 492 TokenType.ORDINALITY, 493 TokenType.OVERLAPS, 494 TokenType.OVERWRITE, 495 TokenType.PARTITION, 496 TokenType.PERCENT, 497 TokenType.PIVOT, 498 TokenType.PRAGMA, 499 TokenType.RANGE, 500 TokenType.RECURSIVE, 501 TokenType.REFERENCES, 502 TokenType.REFRESH, 503 TokenType.RENAME, 504 TokenType.REPLACE, 505 TokenType.RIGHT, 506 TokenType.ROLLUP, 507 TokenType.ROW, 508 TokenType.ROWS, 509 TokenType.SEMI, 510 TokenType.SET, 511 TokenType.SETTINGS, 512 TokenType.SHOW, 513 TokenType.TEMPORARY, 514 TokenType.TOP, 515 TokenType.TRUE, 516 TokenType.TRUNCATE, 517 TokenType.UNIQUE, 518 TokenType.UNNEST, 519 TokenType.UNPIVOT, 520 TokenType.UPDATE, 521 TokenType.USE, 522 TokenType.VOLATILE, 523 TokenType.WINDOW, 524 *CREATABLES, 525 *SUBQUERY_PREDICATES, 526 *TYPE_TOKENS, 527 *NO_PAREN_FUNCTIONS, 528 } 529 ID_VAR_TOKENS.remove(TokenType.UNION) 530 531 INTERVAL_VARS = ID_VAR_TOKENS - {TokenType.END} 532 533 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 534 TokenType.ANTI, 535 TokenType.APPLY, 536 TokenType.ASOF, 537 TokenType.FULL, 538 TokenType.LEFT, 539 TokenType.LOCK, 540 TokenType.NATURAL, 541 TokenType.OFFSET, 542 TokenType.RIGHT, 543 TokenType.SEMI, 544 TokenType.WINDOW, 545 } 546 547 ALIAS_TOKENS = ID_VAR_TOKENS 548 549 ARRAY_CONSTRUCTORS = { 550 "ARRAY": exp.Array, 551 "LIST": exp.List, 552 } 553 554 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 555 556 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 557 558 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 559 560 FUNC_TOKENS = { 561 TokenType.COLLATE, 562 TokenType.COMMAND, 563 TokenType.CURRENT_DATE, 564 TokenType.CURRENT_DATETIME, 565 TokenType.CURRENT_TIMESTAMP, 566 TokenType.CURRENT_TIME, 567 TokenType.CURRENT_USER, 568 TokenType.FILTER, 569 TokenType.FIRST, 570 TokenType.FORMAT, 571 TokenType.GLOB, 572 TokenType.IDENTIFIER, 573 TokenType.INDEX, 574 TokenType.ISNULL, 575 TokenType.ILIKE, 576 TokenType.INSERT, 577 TokenType.LIKE, 578 TokenType.MERGE, 579 TokenType.OFFSET, 580 TokenType.PRIMARY_KEY, 581 TokenType.RANGE, 582 TokenType.REPLACE, 583 TokenType.RLIKE, 584 TokenType.ROW, 585 TokenType.UNNEST, 586 TokenType.VAR, 587 TokenType.LEFT, 588 TokenType.RIGHT, 589 TokenType.SEQUENCE, 590 TokenType.DATE, 591 TokenType.DATETIME, 592 TokenType.TABLE, 593 TokenType.TIMESTAMP, 594 TokenType.TIMESTAMPTZ, 595 TokenType.TRUNCATE, 596 TokenType.WINDOW, 597 TokenType.XOR, 598 *TYPE_TOKENS, 599 *SUBQUERY_PREDICATES, 600 } 601 602 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 603 TokenType.AND: exp.And, 604 } 605 606 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 607 TokenType.COLON_EQ: exp.PropertyEQ, 608 } 609 610 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 611 TokenType.OR: exp.Or, 612 } 613 614 EQUALITY = { 615 TokenType.EQ: exp.EQ, 616 TokenType.NEQ: exp.NEQ, 617 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 618 } 619 620 COMPARISON = { 621 TokenType.GT: exp.GT, 622 TokenType.GTE: exp.GTE, 623 TokenType.LT: exp.LT, 624 TokenType.LTE: exp.LTE, 625 } 626 627 BITWISE = { 628 TokenType.AMP: exp.BitwiseAnd, 629 TokenType.CARET: exp.BitwiseXor, 630 TokenType.PIPE: exp.BitwiseOr, 631 } 632 633 TERM = { 634 TokenType.DASH: exp.Sub, 635 TokenType.PLUS: exp.Add, 636 TokenType.MOD: exp.Mod, 637 TokenType.COLLATE: exp.Collate, 638 } 639 640 FACTOR = { 641 TokenType.DIV: exp.IntDiv, 642 TokenType.LR_ARROW: exp.Distance, 643 TokenType.SLASH: exp.Div, 644 TokenType.STAR: exp.Mul, 645 } 646 647 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 648 649 TIMES = { 650 TokenType.TIME, 651 TokenType.TIMETZ, 652 } 653 654 TIMESTAMPS = { 655 TokenType.TIMESTAMP, 656 TokenType.TIMESTAMPTZ, 657 TokenType.TIMESTAMPLTZ, 658 *TIMES, 659 } 660 661 SET_OPERATIONS = { 662 TokenType.UNION, 663 TokenType.INTERSECT, 664 TokenType.EXCEPT, 665 } 666 667 JOIN_METHODS = { 668 TokenType.ASOF, 669 TokenType.NATURAL, 670 TokenType.POSITIONAL, 671 } 672 673 JOIN_SIDES = { 674 TokenType.LEFT, 675 TokenType.RIGHT, 676 TokenType.FULL, 677 } 678 679 JOIN_KINDS = { 680 TokenType.ANTI, 681 TokenType.CROSS, 682 TokenType.INNER, 683 TokenType.OUTER, 684 TokenType.SEMI, 685 TokenType.STRAIGHT_JOIN, 686 } 687 688 JOIN_HINTS: t.Set[str] = set() 689 690 LAMBDAS = { 691 TokenType.ARROW: lambda self, expressions: self.expression( 692 exp.Lambda, 693 this=self._replace_lambda( 694 self._parse_assignment(), 695 expressions, 696 ), 697 expressions=expressions, 698 ), 699 TokenType.FARROW: lambda self, expressions: self.expression( 700 exp.Kwarg, 701 this=exp.var(expressions[0].name), 702 expression=self._parse_assignment(), 703 ), 704 } 705 706 COLUMN_OPERATORS = { 707 TokenType.DOT: None, 708 TokenType.DCOLON: lambda self, this, to: self.expression( 709 exp.Cast if self.STRICT_CAST else exp.TryCast, 710 this=this, 711 to=to, 712 ), 713 TokenType.ARROW: lambda self, this, path: self.expression( 714 exp.JSONExtract, 715 this=this, 716 expression=self.dialect.to_json_path(path), 717 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 718 ), 719 TokenType.DARROW: lambda self, this, path: self.expression( 720 exp.JSONExtractScalar, 721 this=this, 722 expression=self.dialect.to_json_path(path), 723 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 724 ), 725 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 726 exp.JSONBExtract, 727 this=this, 728 expression=path, 729 ), 730 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 731 exp.JSONBExtractScalar, 732 this=this, 733 expression=path, 734 ), 735 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 736 exp.JSONBContains, 737 this=this, 738 expression=key, 739 ), 740 } 741 742 EXPRESSION_PARSERS = { 743 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 744 exp.Column: lambda self: self._parse_column(), 745 exp.Condition: lambda self: self._parse_assignment(), 746 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 747 exp.Expression: lambda self: self._parse_expression(), 748 exp.From: lambda self: self._parse_from(joins=True), 749 exp.Group: lambda self: self._parse_group(), 750 exp.Having: lambda self: self._parse_having(), 751 exp.Identifier: lambda self: self._parse_id_var(), 752 exp.Join: lambda self: self._parse_join(), 753 exp.Lambda: lambda self: self._parse_lambda(), 754 exp.Lateral: lambda self: self._parse_lateral(), 755 exp.Limit: lambda self: self._parse_limit(), 756 exp.Offset: lambda self: self._parse_offset(), 757 exp.Order: lambda self: self._parse_order(), 758 exp.Ordered: lambda self: self._parse_ordered(), 759 exp.Properties: lambda self: self._parse_properties(), 760 exp.Qualify: lambda self: self._parse_qualify(), 761 exp.Returning: lambda self: self._parse_returning(), 762 exp.Select: lambda self: self._parse_select(), 763 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 764 exp.Table: lambda self: self._parse_table_parts(), 765 exp.TableAlias: lambda self: self._parse_table_alias(), 766 exp.When: lambda self: seq_get(self._parse_when_matched(), 0), 767 exp.Where: lambda self: self._parse_where(), 768 exp.Window: lambda self: self._parse_named_window(), 769 exp.With: lambda self: self._parse_with(), 770 "JOIN_TYPE": lambda self: self._parse_join_parts(), 771 } 772 773 STATEMENT_PARSERS = { 774 TokenType.ALTER: lambda self: self._parse_alter(), 775 TokenType.BEGIN: lambda self: self._parse_transaction(), 776 TokenType.CACHE: lambda self: self._parse_cache(), 777 TokenType.COMMENT: lambda self: self._parse_comment(), 778 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 779 TokenType.COPY: lambda self: self._parse_copy(), 780 TokenType.CREATE: lambda self: self._parse_create(), 781 TokenType.DELETE: lambda self: self._parse_delete(), 782 TokenType.DESC: lambda self: self._parse_describe(), 783 TokenType.DESCRIBE: lambda self: self._parse_describe(), 784 TokenType.DROP: lambda self: self._parse_drop(), 785 TokenType.GRANT: lambda self: self._parse_grant(), 786 TokenType.INSERT: lambda self: self._parse_insert(), 787 TokenType.KILL: lambda self: self._parse_kill(), 788 TokenType.LOAD: lambda self: self._parse_load(), 789 TokenType.MERGE: lambda self: self._parse_merge(), 790 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 791 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 792 TokenType.REFRESH: lambda self: self._parse_refresh(), 793 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 794 TokenType.SET: lambda self: self._parse_set(), 795 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 796 TokenType.UNCACHE: lambda self: self._parse_uncache(), 797 TokenType.UPDATE: lambda self: self._parse_update(), 798 TokenType.USE: lambda self: self.expression( 799 exp.Use, 800 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 801 this=self._parse_table(schema=False), 802 ), 803 TokenType.SEMICOLON: lambda self: self.expression(exp.Semicolon), 804 } 805 806 UNARY_PARSERS = { 807 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 808 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 809 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 810 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 811 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 812 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 813 } 814 815 STRING_PARSERS = { 816 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 817 exp.RawString, this=token.text 818 ), 819 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 820 exp.National, this=token.text 821 ), 822 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 823 TokenType.STRING: lambda self, token: self.expression( 824 exp.Literal, this=token.text, is_string=True 825 ), 826 TokenType.UNICODE_STRING: lambda self, token: self.expression( 827 exp.UnicodeString, 828 this=token.text, 829 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 830 ), 831 } 832 833 NUMERIC_PARSERS = { 834 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 835 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 836 TokenType.HEX_STRING: lambda self, token: self.expression(exp.HexString, this=token.text), 837 TokenType.NUMBER: lambda self, token: self.expression( 838 exp.Literal, this=token.text, is_string=False 839 ), 840 } 841 842 PRIMARY_PARSERS = { 843 **STRING_PARSERS, 844 **NUMERIC_PARSERS, 845 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 846 TokenType.NULL: lambda self, _: self.expression(exp.Null), 847 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 848 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 849 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 850 TokenType.STAR: lambda self, _: self._parse_star_ops(), 851 } 852 853 PLACEHOLDER_PARSERS = { 854 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 855 TokenType.PARAMETER: lambda self: self._parse_parameter(), 856 TokenType.COLON: lambda self: ( 857 self.expression(exp.Placeholder, this=self._prev.text) 858 if self._match_set(self.ID_VAR_TOKENS) 859 else None 860 ), 861 } 862 863 RANGE_PARSERS = { 864 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 865 TokenType.GLOB: binary_range_parser(exp.Glob), 866 TokenType.ILIKE: binary_range_parser(exp.ILike), 867 TokenType.IN: lambda self, this: self._parse_in(this), 868 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 869 TokenType.IS: lambda self, this: self._parse_is(this), 870 TokenType.LIKE: binary_range_parser(exp.Like), 871 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 872 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 873 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 874 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 875 } 876 877 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 878 "ALLOWED_VALUES": lambda self: self.expression( 879 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 880 ), 881 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 882 "AUTO": lambda self: self._parse_auto_property(), 883 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 884 "BACKUP": lambda self: self.expression( 885 exp.BackupProperty, this=self._parse_var(any_token=True) 886 ), 887 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 888 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 889 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 890 "CHECKSUM": lambda self: self._parse_checksum(), 891 "CLUSTER BY": lambda self: self._parse_cluster(), 892 "CLUSTERED": lambda self: self._parse_clustered_by(), 893 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 894 exp.CollateProperty, **kwargs 895 ), 896 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 897 "CONTAINS": lambda self: self._parse_contains_property(), 898 "COPY": lambda self: self._parse_copy_property(), 899 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 900 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 901 "DEFINER": lambda self: self._parse_definer(), 902 "DETERMINISTIC": lambda self: self.expression( 903 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 904 ), 905 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 906 "DUPLICATE": lambda self: self._parse_duplicate(), 907 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 908 "DISTKEY": lambda self: self._parse_distkey(), 909 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 910 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 911 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 912 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 913 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 914 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 915 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 916 "FREESPACE": lambda self: self._parse_freespace(), 917 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 918 "HEAP": lambda self: self.expression(exp.HeapProperty), 919 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 920 "IMMUTABLE": lambda self: self.expression( 921 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 922 ), 923 "INHERITS": lambda self: self.expression( 924 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 925 ), 926 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 927 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 928 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 929 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 930 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 931 "LIKE": lambda self: self._parse_create_like(), 932 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 933 "LOCK": lambda self: self._parse_locking(), 934 "LOCKING": lambda self: self._parse_locking(), 935 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 936 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 937 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 938 "MODIFIES": lambda self: self._parse_modifies_property(), 939 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 940 "NO": lambda self: self._parse_no_property(), 941 "ON": lambda self: self._parse_on_property(), 942 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 943 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 944 "PARTITION": lambda self: self._parse_partitioned_of(), 945 "PARTITION BY": lambda self: self._parse_partitioned_by(), 946 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 947 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 948 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 949 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 950 "READS": lambda self: self._parse_reads_property(), 951 "REMOTE": lambda self: self._parse_remote_with_connection(), 952 "RETURNS": lambda self: self._parse_returns(), 953 "STRICT": lambda self: self.expression(exp.StrictProperty), 954 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 955 "ROW": lambda self: self._parse_row(), 956 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 957 "SAMPLE": lambda self: self.expression( 958 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 959 ), 960 "SECURE": lambda self: self.expression(exp.SecureProperty), 961 "SECURITY": lambda self: self._parse_security(), 962 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 963 "SETTINGS": lambda self: self._parse_settings_property(), 964 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 965 "SORTKEY": lambda self: self._parse_sortkey(), 966 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 967 "STABLE": lambda self: self.expression( 968 exp.StabilityProperty, this=exp.Literal.string("STABLE") 969 ), 970 "STORED": lambda self: self._parse_stored(), 971 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 972 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 973 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 974 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 975 "TO": lambda self: self._parse_to_table(), 976 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 977 "TRANSFORM": lambda self: self.expression( 978 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 979 ), 980 "TTL": lambda self: self._parse_ttl(), 981 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 982 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 983 "VOLATILE": lambda self: self._parse_volatile_property(), 984 "WITH": lambda self: self._parse_with_property(), 985 } 986 987 CONSTRAINT_PARSERS = { 988 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 989 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 990 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 991 "CHARACTER SET": lambda self: self.expression( 992 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 993 ), 994 "CHECK": lambda self: self.expression( 995 exp.CheckColumnConstraint, 996 this=self._parse_wrapped(self._parse_assignment), 997 enforced=self._match_text_seq("ENFORCED"), 998 ), 999 "COLLATE": lambda self: self.expression( 1000 exp.CollateColumnConstraint, 1001 this=self._parse_identifier() or self._parse_column(), 1002 ), 1003 "COMMENT": lambda self: self.expression( 1004 exp.CommentColumnConstraint, this=self._parse_string() 1005 ), 1006 "COMPRESS": lambda self: self._parse_compress(), 1007 "CLUSTERED": lambda self: self.expression( 1008 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1009 ), 1010 "NONCLUSTERED": lambda self: self.expression( 1011 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1012 ), 1013 "DEFAULT": lambda self: self.expression( 1014 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1015 ), 1016 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1017 "EPHEMERAL": lambda self: self.expression( 1018 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1019 ), 1020 "EXCLUDE": lambda self: self.expression( 1021 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1022 ), 1023 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1024 "FORMAT": lambda self: self.expression( 1025 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1026 ), 1027 "GENERATED": lambda self: self._parse_generated_as_identity(), 1028 "IDENTITY": lambda self: self._parse_auto_increment(), 1029 "INLINE": lambda self: self._parse_inline(), 1030 "LIKE": lambda self: self._parse_create_like(), 1031 "NOT": lambda self: self._parse_not_constraint(), 1032 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1033 "ON": lambda self: ( 1034 self._match(TokenType.UPDATE) 1035 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1036 ) 1037 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1038 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1039 "PERIOD": lambda self: self._parse_period_for_system_time(), 1040 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1041 "REFERENCES": lambda self: self._parse_references(match=False), 1042 "TITLE": lambda self: self.expression( 1043 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1044 ), 1045 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1046 "UNIQUE": lambda self: self._parse_unique(), 1047 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1048 "WITH": lambda self: self.expression( 1049 exp.Properties, expressions=self._parse_wrapped_properties() 1050 ), 1051 } 1052 1053 ALTER_PARSERS = { 1054 "ADD": lambda self: self._parse_alter_table_add(), 1055 "ALTER": lambda self: self._parse_alter_table_alter(), 1056 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1057 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1058 "DROP": lambda self: self._parse_alter_table_drop(), 1059 "RENAME": lambda self: self._parse_alter_table_rename(), 1060 "SET": lambda self: self._parse_alter_table_set(), 1061 "AS": lambda self: self._parse_select(), 1062 } 1063 1064 ALTER_ALTER_PARSERS = { 1065 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1066 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1067 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1068 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1069 } 1070 1071 SCHEMA_UNNAMED_CONSTRAINTS = { 1072 "CHECK", 1073 "EXCLUDE", 1074 "FOREIGN KEY", 1075 "LIKE", 1076 "PERIOD", 1077 "PRIMARY KEY", 1078 "UNIQUE", 1079 } 1080 1081 NO_PAREN_FUNCTION_PARSERS = { 1082 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1083 "CASE": lambda self: self._parse_case(), 1084 "CONNECT_BY_ROOT": lambda self: self.expression( 1085 exp.ConnectByRoot, this=self._parse_column() 1086 ), 1087 "IF": lambda self: self._parse_if(), 1088 "NEXT": lambda self: self._parse_next_value_for(), 1089 } 1090 1091 INVALID_FUNC_NAME_TOKENS = { 1092 TokenType.IDENTIFIER, 1093 TokenType.STRING, 1094 } 1095 1096 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1097 1098 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1099 1100 FUNCTION_PARSERS = { 1101 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1102 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1103 "DECODE": lambda self: self._parse_decode(), 1104 "EXTRACT": lambda self: self._parse_extract(), 1105 "GAP_FILL": lambda self: self._parse_gap_fill(), 1106 "JSON_OBJECT": lambda self: self._parse_json_object(), 1107 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1108 "JSON_TABLE": lambda self: self._parse_json_table(), 1109 "MATCH": lambda self: self._parse_match_against(), 1110 "NORMALIZE": lambda self: self._parse_normalize(), 1111 "OPENJSON": lambda self: self._parse_open_json(), 1112 "OVERLAY": lambda self: self._parse_overlay(), 1113 "POSITION": lambda self: self._parse_position(), 1114 "PREDICT": lambda self: self._parse_predict(), 1115 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1116 "STRING_AGG": lambda self: self._parse_string_agg(), 1117 "SUBSTRING": lambda self: self._parse_substring(), 1118 "TRIM": lambda self: self._parse_trim(), 1119 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1120 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1121 } 1122 1123 QUERY_MODIFIER_PARSERS = { 1124 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1125 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1126 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1127 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1128 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1129 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1130 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1131 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1132 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1133 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1134 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1135 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1136 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1137 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1138 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1139 TokenType.CLUSTER_BY: lambda self: ( 1140 "cluster", 1141 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1142 ), 1143 TokenType.DISTRIBUTE_BY: lambda self: ( 1144 "distribute", 1145 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1146 ), 1147 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1148 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1149 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1150 } 1151 1152 SET_PARSERS = { 1153 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1154 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1155 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1156 "TRANSACTION": lambda self: self._parse_set_transaction(), 1157 } 1158 1159 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1160 1161 TYPE_LITERAL_PARSERS = { 1162 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1163 } 1164 1165 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1166 1167 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1168 1169 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1170 1171 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1172 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1173 "ISOLATION": ( 1174 ("LEVEL", "REPEATABLE", "READ"), 1175 ("LEVEL", "READ", "COMMITTED"), 1176 ("LEVEL", "READ", "UNCOMITTED"), 1177 ("LEVEL", "SERIALIZABLE"), 1178 ), 1179 "READ": ("WRITE", "ONLY"), 1180 } 1181 1182 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1183 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1184 ) 1185 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1186 1187 CREATE_SEQUENCE: OPTIONS_TYPE = { 1188 "SCALE": ("EXTEND", "NOEXTEND"), 1189 "SHARD": ("EXTEND", "NOEXTEND"), 1190 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1191 **dict.fromkeys( 1192 ( 1193 "SESSION", 1194 "GLOBAL", 1195 "KEEP", 1196 "NOKEEP", 1197 "ORDER", 1198 "NOORDER", 1199 "NOCACHE", 1200 "CYCLE", 1201 "NOCYCLE", 1202 "NOMINVALUE", 1203 "NOMAXVALUE", 1204 "NOSCALE", 1205 "NOSHARD", 1206 ), 1207 tuple(), 1208 ), 1209 } 1210 1211 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1212 1213 USABLES: OPTIONS_TYPE = dict.fromkeys( 1214 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1215 ) 1216 1217 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1218 1219 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1220 "TYPE": ("EVOLUTION",), 1221 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1222 } 1223 1224 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1225 "NOT": ("ENFORCED",), 1226 "MATCH": ( 1227 "FULL", 1228 "PARTIAL", 1229 "SIMPLE", 1230 ), 1231 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1232 **dict.fromkeys(("DEFERRABLE", "NORELY"), tuple()), 1233 } 1234 1235 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1236 1237 CLONE_KEYWORDS = {"CLONE", "COPY"} 1238 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1239 HISTORICAL_DATA_KIND = {"TIMESTAMP", "OFFSET", "STATEMENT", "STREAM"} 1240 1241 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1242 1243 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1244 1245 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1246 1247 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1248 1249 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1250 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1251 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1252 1253 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1254 1255 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1256 1257 ADD_CONSTRAINT_TOKENS = { 1258 TokenType.CONSTRAINT, 1259 TokenType.FOREIGN_KEY, 1260 TokenType.INDEX, 1261 TokenType.KEY, 1262 TokenType.PRIMARY_KEY, 1263 TokenType.UNIQUE, 1264 } 1265 1266 DISTINCT_TOKENS = {TokenType.DISTINCT} 1267 1268 NULL_TOKENS = {TokenType.NULL} 1269 1270 UNNEST_OFFSET_ALIAS_TOKENS = ID_VAR_TOKENS - SET_OPERATIONS 1271 1272 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1273 1274 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1275 1276 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1277 1278 ODBC_DATETIME_LITERALS = { 1279 "d": exp.Date, 1280 "t": exp.Time, 1281 "ts": exp.Timestamp, 1282 } 1283 1284 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1285 1286 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1287 1288 STRICT_CAST = True 1289 1290 PREFIXED_PIVOT_COLUMNS = False 1291 IDENTIFY_PIVOT_STRINGS = False 1292 1293 LOG_DEFAULTS_TO_LN = False 1294 1295 # Whether ADD is present for each column added by ALTER TABLE 1296 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = True 1297 1298 # Whether the table sample clause expects CSV syntax 1299 TABLESAMPLE_CSV = False 1300 1301 # The default method used for table sampling 1302 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1303 1304 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1305 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1306 1307 # Whether the TRIM function expects the characters to trim as its first argument 1308 TRIM_PATTERN_FIRST = False 1309 1310 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1311 STRING_ALIASES = False 1312 1313 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1314 MODIFIERS_ATTACHED_TO_SET_OP = True 1315 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1316 1317 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1318 NO_PAREN_IF_COMMANDS = True 1319 1320 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1321 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1322 1323 # Whether the `:` operator is used to extract a value from a VARIANT column 1324 COLON_IS_VARIANT_EXTRACT = False 1325 1326 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1327 # If this is True and '(' is not found, the keyword will be treated as an identifier 1328 VALUES_FOLLOWED_BY_PAREN = True 1329 1330 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1331 SUPPORTS_IMPLICIT_UNNEST = False 1332 1333 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1334 INTERVAL_SPANS = True 1335 1336 # Whether a PARTITION clause can follow a table reference 1337 SUPPORTS_PARTITION_SELECTION = False 1338 1339 __slots__ = ( 1340 "error_level", 1341 "error_message_context", 1342 "max_errors", 1343 "dialect", 1344 "sql", 1345 "errors", 1346 "_tokens", 1347 "_index", 1348 "_curr", 1349 "_next", 1350 "_prev", 1351 "_prev_comments", 1352 ) 1353 1354 # Autofilled 1355 SHOW_TRIE: t.Dict = {} 1356 SET_TRIE: t.Dict = {} 1357 1358 def __init__( 1359 self, 1360 error_level: t.Optional[ErrorLevel] = None, 1361 error_message_context: int = 100, 1362 max_errors: int = 3, 1363 dialect: DialectType = None, 1364 ): 1365 from sqlglot.dialects import Dialect 1366 1367 self.error_level = error_level or ErrorLevel.IMMEDIATE 1368 self.error_message_context = error_message_context 1369 self.max_errors = max_errors 1370 self.dialect = Dialect.get_or_raise(dialect) 1371 self.reset() 1372 1373 def reset(self): 1374 self.sql = "" 1375 self.errors = [] 1376 self._tokens = [] 1377 self._index = 0 1378 self._curr = None 1379 self._next = None 1380 self._prev = None 1381 self._prev_comments = None 1382 1383 def parse( 1384 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1385 ) -> t.List[t.Optional[exp.Expression]]: 1386 """ 1387 Parses a list of tokens and returns a list of syntax trees, one tree 1388 per parsed SQL statement. 1389 1390 Args: 1391 raw_tokens: The list of tokens. 1392 sql: The original SQL string, used to produce helpful debug messages. 1393 1394 Returns: 1395 The list of the produced syntax trees. 1396 """ 1397 return self._parse( 1398 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1399 ) 1400 1401 def parse_into( 1402 self, 1403 expression_types: exp.IntoType, 1404 raw_tokens: t.List[Token], 1405 sql: t.Optional[str] = None, 1406 ) -> t.List[t.Optional[exp.Expression]]: 1407 """ 1408 Parses a list of tokens into a given Expression type. If a collection of Expression 1409 types is given instead, this method will try to parse the token list into each one 1410 of them, stopping at the first for which the parsing succeeds. 1411 1412 Args: 1413 expression_types: The expression type(s) to try and parse the token list into. 1414 raw_tokens: The list of tokens. 1415 sql: The original SQL string, used to produce helpful debug messages. 1416 1417 Returns: 1418 The target Expression. 1419 """ 1420 errors = [] 1421 for expression_type in ensure_list(expression_types): 1422 parser = self.EXPRESSION_PARSERS.get(expression_type) 1423 if not parser: 1424 raise TypeError(f"No parser registered for {expression_type}") 1425 1426 try: 1427 return self._parse(parser, raw_tokens, sql) 1428 except ParseError as e: 1429 e.errors[0]["into_expression"] = expression_type 1430 errors.append(e) 1431 1432 raise ParseError( 1433 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1434 errors=merge_errors(errors), 1435 ) from errors[-1] 1436 1437 def _parse( 1438 self, 1439 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1440 raw_tokens: t.List[Token], 1441 sql: t.Optional[str] = None, 1442 ) -> t.List[t.Optional[exp.Expression]]: 1443 self.reset() 1444 self.sql = sql or "" 1445 1446 total = len(raw_tokens) 1447 chunks: t.List[t.List[Token]] = [[]] 1448 1449 for i, token in enumerate(raw_tokens): 1450 if token.token_type == TokenType.SEMICOLON: 1451 if token.comments: 1452 chunks.append([token]) 1453 1454 if i < total - 1: 1455 chunks.append([]) 1456 else: 1457 chunks[-1].append(token) 1458 1459 expressions = [] 1460 1461 for tokens in chunks: 1462 self._index = -1 1463 self._tokens = tokens 1464 self._advance() 1465 1466 expressions.append(parse_method(self)) 1467 1468 if self._index < len(self._tokens): 1469 self.raise_error("Invalid expression / Unexpected token") 1470 1471 self.check_errors() 1472 1473 return expressions 1474 1475 def check_errors(self) -> None: 1476 """Logs or raises any found errors, depending on the chosen error level setting.""" 1477 if self.error_level == ErrorLevel.WARN: 1478 for error in self.errors: 1479 logger.error(str(error)) 1480 elif self.error_level == ErrorLevel.RAISE and self.errors: 1481 raise ParseError( 1482 concat_messages(self.errors, self.max_errors), 1483 errors=merge_errors(self.errors), 1484 ) 1485 1486 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1487 """ 1488 Appends an error in the list of recorded errors or raises it, depending on the chosen 1489 error level setting. 1490 """ 1491 token = token or self._curr or self._prev or Token.string("") 1492 start = token.start 1493 end = token.end + 1 1494 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1495 highlight = self.sql[start:end] 1496 end_context = self.sql[end : end + self.error_message_context] 1497 1498 error = ParseError.new( 1499 f"{message}. Line {token.line}, Col: {token.col}.\n" 1500 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1501 description=message, 1502 line=token.line, 1503 col=token.col, 1504 start_context=start_context, 1505 highlight=highlight, 1506 end_context=end_context, 1507 ) 1508 1509 if self.error_level == ErrorLevel.IMMEDIATE: 1510 raise error 1511 1512 self.errors.append(error) 1513 1514 def expression( 1515 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1516 ) -> E: 1517 """ 1518 Creates a new, validated Expression. 1519 1520 Args: 1521 exp_class: The expression class to instantiate. 1522 comments: An optional list of comments to attach to the expression. 1523 kwargs: The arguments to set for the expression along with their respective values. 1524 1525 Returns: 1526 The target expression. 1527 """ 1528 instance = exp_class(**kwargs) 1529 instance.add_comments(comments) if comments else self._add_comments(instance) 1530 return self.validate_expression(instance) 1531 1532 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1533 if expression and self._prev_comments: 1534 expression.add_comments(self._prev_comments) 1535 self._prev_comments = None 1536 1537 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1538 """ 1539 Validates an Expression, making sure that all its mandatory arguments are set. 1540 1541 Args: 1542 expression: The expression to validate. 1543 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1544 1545 Returns: 1546 The validated expression. 1547 """ 1548 if self.error_level != ErrorLevel.IGNORE: 1549 for error_message in expression.error_messages(args): 1550 self.raise_error(error_message) 1551 1552 return expression 1553 1554 def _find_sql(self, start: Token, end: Token) -> str: 1555 return self.sql[start.start : end.end + 1] 1556 1557 def _is_connected(self) -> bool: 1558 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1559 1560 def _advance(self, times: int = 1) -> None: 1561 self._index += times 1562 self._curr = seq_get(self._tokens, self._index) 1563 self._next = seq_get(self._tokens, self._index + 1) 1564 1565 if self._index > 0: 1566 self._prev = self._tokens[self._index - 1] 1567 self._prev_comments = self._prev.comments 1568 else: 1569 self._prev = None 1570 self._prev_comments = None 1571 1572 def _retreat(self, index: int) -> None: 1573 if index != self._index: 1574 self._advance(index - self._index) 1575 1576 def _warn_unsupported(self) -> None: 1577 if len(self._tokens) <= 1: 1578 return 1579 1580 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1581 # interested in emitting a warning for the one being currently processed. 1582 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1583 1584 logger.warning( 1585 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1586 ) 1587 1588 def _parse_command(self) -> exp.Command: 1589 self._warn_unsupported() 1590 return self.expression( 1591 exp.Command, 1592 comments=self._prev_comments, 1593 this=self._prev.text.upper(), 1594 expression=self._parse_string(), 1595 ) 1596 1597 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1598 """ 1599 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1600 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1601 solve this by setting & resetting the parser state accordingly 1602 """ 1603 index = self._index 1604 error_level = self.error_level 1605 1606 self.error_level = ErrorLevel.IMMEDIATE 1607 try: 1608 this = parse_method() 1609 except ParseError: 1610 this = None 1611 finally: 1612 if not this or retreat: 1613 self._retreat(index) 1614 self.error_level = error_level 1615 1616 return this 1617 1618 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1619 start = self._prev 1620 exists = self._parse_exists() if allow_exists else None 1621 1622 self._match(TokenType.ON) 1623 1624 materialized = self._match_text_seq("MATERIALIZED") 1625 kind = self._match_set(self.CREATABLES) and self._prev 1626 if not kind: 1627 return self._parse_as_command(start) 1628 1629 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1630 this = self._parse_user_defined_function(kind=kind.token_type) 1631 elif kind.token_type == TokenType.TABLE: 1632 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1633 elif kind.token_type == TokenType.COLUMN: 1634 this = self._parse_column() 1635 else: 1636 this = self._parse_id_var() 1637 1638 self._match(TokenType.IS) 1639 1640 return self.expression( 1641 exp.Comment, 1642 this=this, 1643 kind=kind.text, 1644 expression=self._parse_string(), 1645 exists=exists, 1646 materialized=materialized, 1647 ) 1648 1649 def _parse_to_table( 1650 self, 1651 ) -> exp.ToTableProperty: 1652 table = self._parse_table_parts(schema=True) 1653 return self.expression(exp.ToTableProperty, this=table) 1654 1655 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1656 def _parse_ttl(self) -> exp.Expression: 1657 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1658 this = self._parse_bitwise() 1659 1660 if self._match_text_seq("DELETE"): 1661 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1662 if self._match_text_seq("RECOMPRESS"): 1663 return self.expression( 1664 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1665 ) 1666 if self._match_text_seq("TO", "DISK"): 1667 return self.expression( 1668 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1669 ) 1670 if self._match_text_seq("TO", "VOLUME"): 1671 return self.expression( 1672 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1673 ) 1674 1675 return this 1676 1677 expressions = self._parse_csv(_parse_ttl_action) 1678 where = self._parse_where() 1679 group = self._parse_group() 1680 1681 aggregates = None 1682 if group and self._match(TokenType.SET): 1683 aggregates = self._parse_csv(self._parse_set_item) 1684 1685 return self.expression( 1686 exp.MergeTreeTTL, 1687 expressions=expressions, 1688 where=where, 1689 group=group, 1690 aggregates=aggregates, 1691 ) 1692 1693 def _parse_statement(self) -> t.Optional[exp.Expression]: 1694 if self._curr is None: 1695 return None 1696 1697 if self._match_set(self.STATEMENT_PARSERS): 1698 return self.STATEMENT_PARSERS[self._prev.token_type](self) 1699 1700 if self._match_set(self.dialect.tokenizer.COMMANDS): 1701 return self._parse_command() 1702 1703 expression = self._parse_expression() 1704 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1705 return self._parse_query_modifiers(expression) 1706 1707 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1708 start = self._prev 1709 temporary = self._match(TokenType.TEMPORARY) 1710 materialized = self._match_text_seq("MATERIALIZED") 1711 1712 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1713 if not kind: 1714 return self._parse_as_command(start) 1715 1716 concurrently = self._match_text_seq("CONCURRENTLY") 1717 if_exists = exists or self._parse_exists() 1718 table = self._parse_table_parts( 1719 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1720 ) 1721 1722 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1723 1724 if self._match(TokenType.L_PAREN, advance=False): 1725 expressions = self._parse_wrapped_csv(self._parse_types) 1726 else: 1727 expressions = None 1728 1729 return self.expression( 1730 exp.Drop, 1731 comments=start.comments, 1732 exists=if_exists, 1733 this=table, 1734 expressions=expressions, 1735 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1736 temporary=temporary, 1737 materialized=materialized, 1738 cascade=self._match_text_seq("CASCADE"), 1739 constraints=self._match_text_seq("CONSTRAINTS"), 1740 purge=self._match_text_seq("PURGE"), 1741 cluster=cluster, 1742 concurrently=concurrently, 1743 ) 1744 1745 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1746 return ( 1747 self._match_text_seq("IF") 1748 and (not not_ or self._match(TokenType.NOT)) 1749 and self._match(TokenType.EXISTS) 1750 ) 1751 1752 def _parse_create(self) -> exp.Create | exp.Command: 1753 # Note: this can't be None because we've matched a statement parser 1754 start = self._prev 1755 comments = self._prev_comments 1756 1757 replace = ( 1758 start.token_type == TokenType.REPLACE 1759 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1760 or self._match_pair(TokenType.OR, TokenType.ALTER) 1761 ) 1762 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1763 1764 unique = self._match(TokenType.UNIQUE) 1765 1766 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1767 clustered = True 1768 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1769 "COLUMNSTORE" 1770 ): 1771 clustered = False 1772 else: 1773 clustered = None 1774 1775 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1776 self._advance() 1777 1778 properties = None 1779 create_token = self._match_set(self.CREATABLES) and self._prev 1780 1781 if not create_token: 1782 # exp.Properties.Location.POST_CREATE 1783 properties = self._parse_properties() 1784 create_token = self._match_set(self.CREATABLES) and self._prev 1785 1786 if not properties or not create_token: 1787 return self._parse_as_command(start) 1788 1789 concurrently = self._match_text_seq("CONCURRENTLY") 1790 exists = self._parse_exists(not_=True) 1791 this = None 1792 expression: t.Optional[exp.Expression] = None 1793 indexes = None 1794 no_schema_binding = None 1795 begin = None 1796 end = None 1797 clone = None 1798 1799 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 1800 nonlocal properties 1801 if properties and temp_props: 1802 properties.expressions.extend(temp_props.expressions) 1803 elif temp_props: 1804 properties = temp_props 1805 1806 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1807 this = self._parse_user_defined_function(kind=create_token.token_type) 1808 1809 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 1810 extend_props(self._parse_properties()) 1811 1812 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 1813 extend_props(self._parse_properties()) 1814 1815 if not expression: 1816 if self._match(TokenType.COMMAND): 1817 expression = self._parse_as_command(self._prev) 1818 else: 1819 begin = self._match(TokenType.BEGIN) 1820 return_ = self._match_text_seq("RETURN") 1821 1822 if self._match(TokenType.STRING, advance=False): 1823 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 1824 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 1825 expression = self._parse_string() 1826 extend_props(self._parse_properties()) 1827 else: 1828 expression = self._parse_statement() 1829 1830 end = self._match_text_seq("END") 1831 1832 if return_: 1833 expression = self.expression(exp.Return, this=expression) 1834 elif create_token.token_type == TokenType.INDEX: 1835 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 1836 if not self._match(TokenType.ON): 1837 index = self._parse_id_var() 1838 anonymous = False 1839 else: 1840 index = None 1841 anonymous = True 1842 1843 this = self._parse_index(index=index, anonymous=anonymous) 1844 elif create_token.token_type in self.DB_CREATABLES: 1845 table_parts = self._parse_table_parts( 1846 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 1847 ) 1848 1849 # exp.Properties.Location.POST_NAME 1850 self._match(TokenType.COMMA) 1851 extend_props(self._parse_properties(before=True)) 1852 1853 this = self._parse_schema(this=table_parts) 1854 1855 # exp.Properties.Location.POST_SCHEMA and POST_WITH 1856 extend_props(self._parse_properties()) 1857 1858 self._match(TokenType.ALIAS) 1859 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 1860 # exp.Properties.Location.POST_ALIAS 1861 extend_props(self._parse_properties()) 1862 1863 if create_token.token_type == TokenType.SEQUENCE: 1864 expression = self._parse_types() 1865 extend_props(self._parse_properties()) 1866 else: 1867 expression = self._parse_ddl_select() 1868 1869 if create_token.token_type == TokenType.TABLE: 1870 # exp.Properties.Location.POST_EXPRESSION 1871 extend_props(self._parse_properties()) 1872 1873 indexes = [] 1874 while True: 1875 index = self._parse_index() 1876 1877 # exp.Properties.Location.POST_INDEX 1878 extend_props(self._parse_properties()) 1879 if not index: 1880 break 1881 else: 1882 self._match(TokenType.COMMA) 1883 indexes.append(index) 1884 elif create_token.token_type == TokenType.VIEW: 1885 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 1886 no_schema_binding = True 1887 1888 shallow = self._match_text_seq("SHALLOW") 1889 1890 if self._match_texts(self.CLONE_KEYWORDS): 1891 copy = self._prev.text.lower() == "copy" 1892 clone = self.expression( 1893 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 1894 ) 1895 1896 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 1897 return self._parse_as_command(start) 1898 1899 create_kind_text = create_token.text.upper() 1900 return self.expression( 1901 exp.Create, 1902 comments=comments, 1903 this=this, 1904 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 1905 replace=replace, 1906 refresh=refresh, 1907 unique=unique, 1908 expression=expression, 1909 exists=exists, 1910 properties=properties, 1911 indexes=indexes, 1912 no_schema_binding=no_schema_binding, 1913 begin=begin, 1914 end=end, 1915 clone=clone, 1916 concurrently=concurrently, 1917 clustered=clustered, 1918 ) 1919 1920 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 1921 seq = exp.SequenceProperties() 1922 1923 options = [] 1924 index = self._index 1925 1926 while self._curr: 1927 self._match(TokenType.COMMA) 1928 if self._match_text_seq("INCREMENT"): 1929 self._match_text_seq("BY") 1930 self._match_text_seq("=") 1931 seq.set("increment", self._parse_term()) 1932 elif self._match_text_seq("MINVALUE"): 1933 seq.set("minvalue", self._parse_term()) 1934 elif self._match_text_seq("MAXVALUE"): 1935 seq.set("maxvalue", self._parse_term()) 1936 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 1937 self._match_text_seq("=") 1938 seq.set("start", self._parse_term()) 1939 elif self._match_text_seq("CACHE"): 1940 # T-SQL allows empty CACHE which is initialized dynamically 1941 seq.set("cache", self._parse_number() or True) 1942 elif self._match_text_seq("OWNED", "BY"): 1943 # "OWNED BY NONE" is the default 1944 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 1945 else: 1946 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 1947 if opt: 1948 options.append(opt) 1949 else: 1950 break 1951 1952 seq.set("options", options if options else None) 1953 return None if self._index == index else seq 1954 1955 def _parse_property_before(self) -> t.Optional[exp.Expression]: 1956 # only used for teradata currently 1957 self._match(TokenType.COMMA) 1958 1959 kwargs = { 1960 "no": self._match_text_seq("NO"), 1961 "dual": self._match_text_seq("DUAL"), 1962 "before": self._match_text_seq("BEFORE"), 1963 "default": self._match_text_seq("DEFAULT"), 1964 "local": (self._match_text_seq("LOCAL") and "LOCAL") 1965 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 1966 "after": self._match_text_seq("AFTER"), 1967 "minimum": self._match_texts(("MIN", "MINIMUM")), 1968 "maximum": self._match_texts(("MAX", "MAXIMUM")), 1969 } 1970 1971 if self._match_texts(self.PROPERTY_PARSERS): 1972 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 1973 try: 1974 return parser(self, **{k: v for k, v in kwargs.items() if v}) 1975 except TypeError: 1976 self.raise_error(f"Cannot parse property '{self._prev.text}'") 1977 1978 return None 1979 1980 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 1981 return self._parse_wrapped_csv(self._parse_property) 1982 1983 def _parse_property(self) -> t.Optional[exp.Expression]: 1984 if self._match_texts(self.PROPERTY_PARSERS): 1985 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 1986 1987 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 1988 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 1989 1990 if self._match_text_seq("COMPOUND", "SORTKEY"): 1991 return self._parse_sortkey(compound=True) 1992 1993 if self._match_text_seq("SQL", "SECURITY"): 1994 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 1995 1996 index = self._index 1997 key = self._parse_column() 1998 1999 if not self._match(TokenType.EQ): 2000 self._retreat(index) 2001 return self._parse_sequence_properties() 2002 2003 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2004 if isinstance(key, exp.Column): 2005 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2006 2007 value = self._parse_bitwise() or self._parse_var(any_token=True) 2008 2009 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2010 if isinstance(value, exp.Column): 2011 value = exp.var(value.name) 2012 2013 return self.expression(exp.Property, this=key, value=value) 2014 2015 def _parse_stored(self) -> exp.FileFormatProperty: 2016 self._match(TokenType.ALIAS) 2017 2018 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2019 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2020 2021 return self.expression( 2022 exp.FileFormatProperty, 2023 this=( 2024 self.expression( 2025 exp.InputOutputFormat, input_format=input_format, output_format=output_format 2026 ) 2027 if input_format or output_format 2028 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2029 ), 2030 ) 2031 2032 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2033 field = self._parse_field() 2034 if isinstance(field, exp.Identifier) and not field.quoted: 2035 field = exp.var(field) 2036 2037 return field 2038 2039 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2040 self._match(TokenType.EQ) 2041 self._match(TokenType.ALIAS) 2042 2043 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2044 2045 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2046 properties = [] 2047 while True: 2048 if before: 2049 prop = self._parse_property_before() 2050 else: 2051 prop = self._parse_property() 2052 if not prop: 2053 break 2054 for p in ensure_list(prop): 2055 properties.append(p) 2056 2057 if properties: 2058 return self.expression(exp.Properties, expressions=properties) 2059 2060 return None 2061 2062 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2063 return self.expression( 2064 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2065 ) 2066 2067 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2068 if self._match_texts(("DEFINER", "INVOKER")): 2069 security_specifier = self._prev.text.upper() 2070 return self.expression(exp.SecurityProperty, this=security_specifier) 2071 return None 2072 2073 def _parse_settings_property(self) -> exp.SettingsProperty: 2074 return self.expression( 2075 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2076 ) 2077 2078 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2079 if self._index >= 2: 2080 pre_volatile_token = self._tokens[self._index - 2] 2081 else: 2082 pre_volatile_token = None 2083 2084 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2085 return exp.VolatileProperty() 2086 2087 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2088 2089 def _parse_retention_period(self) -> exp.Var: 2090 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2091 number = self._parse_number() 2092 number_str = f"{number} " if number else "" 2093 unit = self._parse_var(any_token=True) 2094 return exp.var(f"{number_str}{unit}") 2095 2096 def _parse_system_versioning_property( 2097 self, with_: bool = False 2098 ) -> exp.WithSystemVersioningProperty: 2099 self._match(TokenType.EQ) 2100 prop = self.expression( 2101 exp.WithSystemVersioningProperty, 2102 **{ # type: ignore 2103 "on": True, 2104 "with": with_, 2105 }, 2106 ) 2107 2108 if self._match_text_seq("OFF"): 2109 prop.set("on", False) 2110 return prop 2111 2112 self._match(TokenType.ON) 2113 if self._match(TokenType.L_PAREN): 2114 while self._curr and not self._match(TokenType.R_PAREN): 2115 if self._match_text_seq("HISTORY_TABLE", "="): 2116 prop.set("this", self._parse_table_parts()) 2117 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2118 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2119 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2120 prop.set("retention_period", self._parse_retention_period()) 2121 2122 self._match(TokenType.COMMA) 2123 2124 return prop 2125 2126 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2127 self._match(TokenType.EQ) 2128 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2129 prop = self.expression(exp.DataDeletionProperty, on=on) 2130 2131 if self._match(TokenType.L_PAREN): 2132 while self._curr and not self._match(TokenType.R_PAREN): 2133 if self._match_text_seq("FILTER_COLUMN", "="): 2134 prop.set("filter_column", self._parse_column()) 2135 elif self._match_text_seq("RETENTION_PERIOD", "="): 2136 prop.set("retention_period", self._parse_retention_period()) 2137 2138 self._match(TokenType.COMMA) 2139 2140 return prop 2141 2142 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2143 kind = "HASH" 2144 expressions: t.Optional[t.List[exp.Expression]] = None 2145 if self._match_text_seq("BY", "HASH"): 2146 expressions = self._parse_wrapped_csv(self._parse_id_var) 2147 elif self._match_text_seq("BY", "RANDOM"): 2148 kind = "RANDOM" 2149 2150 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2151 buckets: t.Optional[exp.Expression] = None 2152 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2153 buckets = self._parse_number() 2154 2155 return self.expression( 2156 exp.DistributedByProperty, 2157 expressions=expressions, 2158 kind=kind, 2159 buckets=buckets, 2160 order=self._parse_order(), 2161 ) 2162 2163 def _parse_duplicate(self) -> exp.DuplicateKeyProperty: 2164 self._match_text_seq("KEY") 2165 expressions = self._parse_wrapped_csv(self._parse_id_var, optional=False) 2166 return self.expression(exp.DuplicateKeyProperty, expressions=expressions) 2167 2168 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2169 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2170 prop = self._parse_system_versioning_property(with_=True) 2171 self._match_r_paren() 2172 return prop 2173 2174 if self._match(TokenType.L_PAREN, advance=False): 2175 return self._parse_wrapped_properties() 2176 2177 if self._match_text_seq("JOURNAL"): 2178 return self._parse_withjournaltable() 2179 2180 if self._match_texts(self.VIEW_ATTRIBUTES): 2181 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2182 2183 if self._match_text_seq("DATA"): 2184 return self._parse_withdata(no=False) 2185 elif self._match_text_seq("NO", "DATA"): 2186 return self._parse_withdata(no=True) 2187 2188 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2189 return self._parse_serde_properties(with_=True) 2190 2191 if self._match(TokenType.SCHEMA): 2192 return self.expression( 2193 exp.WithSchemaBindingProperty, 2194 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2195 ) 2196 2197 if not self._next: 2198 return None 2199 2200 return self._parse_withisolatedloading() 2201 2202 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2203 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2204 self._match(TokenType.EQ) 2205 2206 user = self._parse_id_var() 2207 self._match(TokenType.PARAMETER) 2208 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2209 2210 if not user or not host: 2211 return None 2212 2213 return exp.DefinerProperty(this=f"{user}@{host}") 2214 2215 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2216 self._match(TokenType.TABLE) 2217 self._match(TokenType.EQ) 2218 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2219 2220 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2221 return self.expression(exp.LogProperty, no=no) 2222 2223 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2224 return self.expression(exp.JournalProperty, **kwargs) 2225 2226 def _parse_checksum(self) -> exp.ChecksumProperty: 2227 self._match(TokenType.EQ) 2228 2229 on = None 2230 if self._match(TokenType.ON): 2231 on = True 2232 elif self._match_text_seq("OFF"): 2233 on = False 2234 2235 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2236 2237 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2238 return self.expression( 2239 exp.Cluster, 2240 expressions=( 2241 self._parse_wrapped_csv(self._parse_ordered) 2242 if wrapped 2243 else self._parse_csv(self._parse_ordered) 2244 ), 2245 ) 2246 2247 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2248 self._match_text_seq("BY") 2249 2250 self._match_l_paren() 2251 expressions = self._parse_csv(self._parse_column) 2252 self._match_r_paren() 2253 2254 if self._match_text_seq("SORTED", "BY"): 2255 self._match_l_paren() 2256 sorted_by = self._parse_csv(self._parse_ordered) 2257 self._match_r_paren() 2258 else: 2259 sorted_by = None 2260 2261 self._match(TokenType.INTO) 2262 buckets = self._parse_number() 2263 self._match_text_seq("BUCKETS") 2264 2265 return self.expression( 2266 exp.ClusteredByProperty, 2267 expressions=expressions, 2268 sorted_by=sorted_by, 2269 buckets=buckets, 2270 ) 2271 2272 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2273 if not self._match_text_seq("GRANTS"): 2274 self._retreat(self._index - 1) 2275 return None 2276 2277 return self.expression(exp.CopyGrantsProperty) 2278 2279 def _parse_freespace(self) -> exp.FreespaceProperty: 2280 self._match(TokenType.EQ) 2281 return self.expression( 2282 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2283 ) 2284 2285 def _parse_mergeblockratio( 2286 self, no: bool = False, default: bool = False 2287 ) -> exp.MergeBlockRatioProperty: 2288 if self._match(TokenType.EQ): 2289 return self.expression( 2290 exp.MergeBlockRatioProperty, 2291 this=self._parse_number(), 2292 percent=self._match(TokenType.PERCENT), 2293 ) 2294 2295 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2296 2297 def _parse_datablocksize( 2298 self, 2299 default: t.Optional[bool] = None, 2300 minimum: t.Optional[bool] = None, 2301 maximum: t.Optional[bool] = None, 2302 ) -> exp.DataBlocksizeProperty: 2303 self._match(TokenType.EQ) 2304 size = self._parse_number() 2305 2306 units = None 2307 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2308 units = self._prev.text 2309 2310 return self.expression( 2311 exp.DataBlocksizeProperty, 2312 size=size, 2313 units=units, 2314 default=default, 2315 minimum=minimum, 2316 maximum=maximum, 2317 ) 2318 2319 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2320 self._match(TokenType.EQ) 2321 always = self._match_text_seq("ALWAYS") 2322 manual = self._match_text_seq("MANUAL") 2323 never = self._match_text_seq("NEVER") 2324 default = self._match_text_seq("DEFAULT") 2325 2326 autotemp = None 2327 if self._match_text_seq("AUTOTEMP"): 2328 autotemp = self._parse_schema() 2329 2330 return self.expression( 2331 exp.BlockCompressionProperty, 2332 always=always, 2333 manual=manual, 2334 never=never, 2335 default=default, 2336 autotemp=autotemp, 2337 ) 2338 2339 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2340 index = self._index 2341 no = self._match_text_seq("NO") 2342 concurrent = self._match_text_seq("CONCURRENT") 2343 2344 if not self._match_text_seq("ISOLATED", "LOADING"): 2345 self._retreat(index) 2346 return None 2347 2348 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2349 return self.expression( 2350 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2351 ) 2352 2353 def _parse_locking(self) -> exp.LockingProperty: 2354 if self._match(TokenType.TABLE): 2355 kind = "TABLE" 2356 elif self._match(TokenType.VIEW): 2357 kind = "VIEW" 2358 elif self._match(TokenType.ROW): 2359 kind = "ROW" 2360 elif self._match_text_seq("DATABASE"): 2361 kind = "DATABASE" 2362 else: 2363 kind = None 2364 2365 if kind in ("DATABASE", "TABLE", "VIEW"): 2366 this = self._parse_table_parts() 2367 else: 2368 this = None 2369 2370 if self._match(TokenType.FOR): 2371 for_or_in = "FOR" 2372 elif self._match(TokenType.IN): 2373 for_or_in = "IN" 2374 else: 2375 for_or_in = None 2376 2377 if self._match_text_seq("ACCESS"): 2378 lock_type = "ACCESS" 2379 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2380 lock_type = "EXCLUSIVE" 2381 elif self._match_text_seq("SHARE"): 2382 lock_type = "SHARE" 2383 elif self._match_text_seq("READ"): 2384 lock_type = "READ" 2385 elif self._match_text_seq("WRITE"): 2386 lock_type = "WRITE" 2387 elif self._match_text_seq("CHECKSUM"): 2388 lock_type = "CHECKSUM" 2389 else: 2390 lock_type = None 2391 2392 override = self._match_text_seq("OVERRIDE") 2393 2394 return self.expression( 2395 exp.LockingProperty, 2396 this=this, 2397 kind=kind, 2398 for_or_in=for_or_in, 2399 lock_type=lock_type, 2400 override=override, 2401 ) 2402 2403 def _parse_partition_by(self) -> t.List[exp.Expression]: 2404 if self._match(TokenType.PARTITION_BY): 2405 return self._parse_csv(self._parse_assignment) 2406 return [] 2407 2408 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2409 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2410 if self._match_text_seq("MINVALUE"): 2411 return exp.var("MINVALUE") 2412 if self._match_text_seq("MAXVALUE"): 2413 return exp.var("MAXVALUE") 2414 return self._parse_bitwise() 2415 2416 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2417 expression = None 2418 from_expressions = None 2419 to_expressions = None 2420 2421 if self._match(TokenType.IN): 2422 this = self._parse_wrapped_csv(self._parse_bitwise) 2423 elif self._match(TokenType.FROM): 2424 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2425 self._match_text_seq("TO") 2426 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2427 elif self._match_text_seq("WITH", "(", "MODULUS"): 2428 this = self._parse_number() 2429 self._match_text_seq(",", "REMAINDER") 2430 expression = self._parse_number() 2431 self._match_r_paren() 2432 else: 2433 self.raise_error("Failed to parse partition bound spec.") 2434 2435 return self.expression( 2436 exp.PartitionBoundSpec, 2437 this=this, 2438 expression=expression, 2439 from_expressions=from_expressions, 2440 to_expressions=to_expressions, 2441 ) 2442 2443 # https://www.postgresql.org/docs/current/sql-createtable.html 2444 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2445 if not self._match_text_seq("OF"): 2446 self._retreat(self._index - 1) 2447 return None 2448 2449 this = self._parse_table(schema=True) 2450 2451 if self._match(TokenType.DEFAULT): 2452 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2453 elif self._match_text_seq("FOR", "VALUES"): 2454 expression = self._parse_partition_bound_spec() 2455 else: 2456 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2457 2458 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2459 2460 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2461 self._match(TokenType.EQ) 2462 return self.expression( 2463 exp.PartitionedByProperty, 2464 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2465 ) 2466 2467 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2468 if self._match_text_seq("AND", "STATISTICS"): 2469 statistics = True 2470 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2471 statistics = False 2472 else: 2473 statistics = None 2474 2475 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2476 2477 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2478 if self._match_text_seq("SQL"): 2479 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2480 return None 2481 2482 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2483 if self._match_text_seq("SQL", "DATA"): 2484 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2485 return None 2486 2487 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2488 if self._match_text_seq("PRIMARY", "INDEX"): 2489 return exp.NoPrimaryIndexProperty() 2490 if self._match_text_seq("SQL"): 2491 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2492 return None 2493 2494 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2495 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2496 return exp.OnCommitProperty() 2497 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2498 return exp.OnCommitProperty(delete=True) 2499 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2500 2501 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2502 if self._match_text_seq("SQL", "DATA"): 2503 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2504 return None 2505 2506 def _parse_distkey(self) -> exp.DistKeyProperty: 2507 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2508 2509 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2510 table = self._parse_table(schema=True) 2511 2512 options = [] 2513 while self._match_texts(("INCLUDING", "EXCLUDING")): 2514 this = self._prev.text.upper() 2515 2516 id_var = self._parse_id_var() 2517 if not id_var: 2518 return None 2519 2520 options.append( 2521 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2522 ) 2523 2524 return self.expression(exp.LikeProperty, this=table, expressions=options) 2525 2526 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2527 return self.expression( 2528 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2529 ) 2530 2531 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2532 self._match(TokenType.EQ) 2533 return self.expression( 2534 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2535 ) 2536 2537 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2538 self._match_text_seq("WITH", "CONNECTION") 2539 return self.expression( 2540 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2541 ) 2542 2543 def _parse_returns(self) -> exp.ReturnsProperty: 2544 value: t.Optional[exp.Expression] 2545 null = None 2546 is_table = self._match(TokenType.TABLE) 2547 2548 if is_table: 2549 if self._match(TokenType.LT): 2550 value = self.expression( 2551 exp.Schema, 2552 this="TABLE", 2553 expressions=self._parse_csv(self._parse_struct_types), 2554 ) 2555 if not self._match(TokenType.GT): 2556 self.raise_error("Expecting >") 2557 else: 2558 value = self._parse_schema(exp.var("TABLE")) 2559 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2560 null = True 2561 value = None 2562 else: 2563 value = self._parse_types() 2564 2565 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2566 2567 def _parse_describe(self) -> exp.Describe: 2568 kind = self._match_set(self.CREATABLES) and self._prev.text 2569 style = self._match_texts(("EXTENDED", "FORMATTED", "HISTORY")) and self._prev.text.upper() 2570 if self._match(TokenType.DOT): 2571 style = None 2572 self._retreat(self._index - 2) 2573 this = self._parse_table(schema=True) 2574 properties = self._parse_properties() 2575 expressions = properties.expressions if properties else None 2576 partition = self._parse_partition() 2577 return self.expression( 2578 exp.Describe, 2579 this=this, 2580 style=style, 2581 kind=kind, 2582 expressions=expressions, 2583 partition=partition, 2584 ) 2585 2586 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2587 kind = self._prev.text.upper() 2588 expressions = [] 2589 2590 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2591 if self._match(TokenType.WHEN): 2592 expression = self._parse_disjunction() 2593 self._match(TokenType.THEN) 2594 else: 2595 expression = None 2596 2597 else_ = self._match(TokenType.ELSE) 2598 2599 if not self._match(TokenType.INTO): 2600 return None 2601 2602 return self.expression( 2603 exp.ConditionalInsert, 2604 this=self.expression( 2605 exp.Insert, 2606 this=self._parse_table(schema=True), 2607 expression=self._parse_derived_table_values(), 2608 ), 2609 expression=expression, 2610 else_=else_, 2611 ) 2612 2613 expression = parse_conditional_insert() 2614 while expression is not None: 2615 expressions.append(expression) 2616 expression = parse_conditional_insert() 2617 2618 return self.expression( 2619 exp.MultitableInserts, 2620 kind=kind, 2621 comments=comments, 2622 expressions=expressions, 2623 source=self._parse_table(), 2624 ) 2625 2626 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2627 comments = ensure_list(self._prev_comments) 2628 hint = self._parse_hint() 2629 overwrite = self._match(TokenType.OVERWRITE) 2630 ignore = self._match(TokenType.IGNORE) 2631 local = self._match_text_seq("LOCAL") 2632 alternative = None 2633 is_function = None 2634 2635 if self._match_text_seq("DIRECTORY"): 2636 this: t.Optional[exp.Expression] = self.expression( 2637 exp.Directory, 2638 this=self._parse_var_or_string(), 2639 local=local, 2640 row_format=self._parse_row_format(match_row=True), 2641 ) 2642 else: 2643 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2644 comments += ensure_list(self._prev_comments) 2645 return self._parse_multitable_inserts(comments) 2646 2647 if self._match(TokenType.OR): 2648 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2649 2650 self._match(TokenType.INTO) 2651 comments += ensure_list(self._prev_comments) 2652 self._match(TokenType.TABLE) 2653 is_function = self._match(TokenType.FUNCTION) 2654 2655 this = ( 2656 self._parse_table(schema=True, parse_partition=True) 2657 if not is_function 2658 else self._parse_function() 2659 ) 2660 2661 returning = self._parse_returning() 2662 2663 return self.expression( 2664 exp.Insert, 2665 comments=comments, 2666 hint=hint, 2667 is_function=is_function, 2668 this=this, 2669 stored=self._match_text_seq("STORED") and self._parse_stored(), 2670 by_name=self._match_text_seq("BY", "NAME"), 2671 exists=self._parse_exists(), 2672 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2673 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2674 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2675 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2676 conflict=self._parse_on_conflict(), 2677 returning=returning or self._parse_returning(), 2678 overwrite=overwrite, 2679 alternative=alternative, 2680 ignore=ignore, 2681 source=self._match(TokenType.TABLE) and self._parse_table(), 2682 ) 2683 2684 def _parse_kill(self) -> exp.Kill: 2685 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2686 2687 return self.expression( 2688 exp.Kill, 2689 this=self._parse_primary(), 2690 kind=kind, 2691 ) 2692 2693 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2694 conflict = self._match_text_seq("ON", "CONFLICT") 2695 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2696 2697 if not conflict and not duplicate: 2698 return None 2699 2700 conflict_keys = None 2701 constraint = None 2702 2703 if conflict: 2704 if self._match_text_seq("ON", "CONSTRAINT"): 2705 constraint = self._parse_id_var() 2706 elif self._match(TokenType.L_PAREN): 2707 conflict_keys = self._parse_csv(self._parse_id_var) 2708 self._match_r_paren() 2709 2710 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2711 if self._prev.token_type == TokenType.UPDATE: 2712 self._match(TokenType.SET) 2713 expressions = self._parse_csv(self._parse_equality) 2714 else: 2715 expressions = None 2716 2717 return self.expression( 2718 exp.OnConflict, 2719 duplicate=duplicate, 2720 expressions=expressions, 2721 action=action, 2722 conflict_keys=conflict_keys, 2723 constraint=constraint, 2724 ) 2725 2726 def _parse_returning(self) -> t.Optional[exp.Returning]: 2727 if not self._match(TokenType.RETURNING): 2728 return None 2729 return self.expression( 2730 exp.Returning, 2731 expressions=self._parse_csv(self._parse_expression), 2732 into=self._match(TokenType.INTO) and self._parse_table_part(), 2733 ) 2734 2735 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2736 if not self._match(TokenType.FORMAT): 2737 return None 2738 return self._parse_row_format() 2739 2740 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2741 index = self._index 2742 with_ = with_ or self._match_text_seq("WITH") 2743 2744 if not self._match(TokenType.SERDE_PROPERTIES): 2745 self._retreat(index) 2746 return None 2747 return self.expression( 2748 exp.SerdeProperties, 2749 **{ # type: ignore 2750 "expressions": self._parse_wrapped_properties(), 2751 "with": with_, 2752 }, 2753 ) 2754 2755 def _parse_row_format( 2756 self, match_row: bool = False 2757 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2758 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 2759 return None 2760 2761 if self._match_text_seq("SERDE"): 2762 this = self._parse_string() 2763 2764 serde_properties = self._parse_serde_properties() 2765 2766 return self.expression( 2767 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 2768 ) 2769 2770 self._match_text_seq("DELIMITED") 2771 2772 kwargs = {} 2773 2774 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 2775 kwargs["fields"] = self._parse_string() 2776 if self._match_text_seq("ESCAPED", "BY"): 2777 kwargs["escaped"] = self._parse_string() 2778 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 2779 kwargs["collection_items"] = self._parse_string() 2780 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 2781 kwargs["map_keys"] = self._parse_string() 2782 if self._match_text_seq("LINES", "TERMINATED", "BY"): 2783 kwargs["lines"] = self._parse_string() 2784 if self._match_text_seq("NULL", "DEFINED", "AS"): 2785 kwargs["null"] = self._parse_string() 2786 2787 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 2788 2789 def _parse_load(self) -> exp.LoadData | exp.Command: 2790 if self._match_text_seq("DATA"): 2791 local = self._match_text_seq("LOCAL") 2792 self._match_text_seq("INPATH") 2793 inpath = self._parse_string() 2794 overwrite = self._match(TokenType.OVERWRITE) 2795 self._match_pair(TokenType.INTO, TokenType.TABLE) 2796 2797 return self.expression( 2798 exp.LoadData, 2799 this=self._parse_table(schema=True), 2800 local=local, 2801 overwrite=overwrite, 2802 inpath=inpath, 2803 partition=self._parse_partition(), 2804 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 2805 serde=self._match_text_seq("SERDE") and self._parse_string(), 2806 ) 2807 return self._parse_as_command(self._prev) 2808 2809 def _parse_delete(self) -> exp.Delete: 2810 # This handles MySQL's "Multiple-Table Syntax" 2811 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 2812 tables = None 2813 comments = self._prev_comments 2814 if not self._match(TokenType.FROM, advance=False): 2815 tables = self._parse_csv(self._parse_table) or None 2816 2817 returning = self._parse_returning() 2818 2819 return self.expression( 2820 exp.Delete, 2821 comments=comments, 2822 tables=tables, 2823 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 2824 using=self._match(TokenType.USING) and self._parse_table(joins=True), 2825 cluster=self._match(TokenType.ON) and self._parse_on_property(), 2826 where=self._parse_where(), 2827 returning=returning or self._parse_returning(), 2828 limit=self._parse_limit(), 2829 ) 2830 2831 def _parse_update(self) -> exp.Update: 2832 comments = self._prev_comments 2833 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 2834 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 2835 returning = self._parse_returning() 2836 return self.expression( 2837 exp.Update, 2838 comments=comments, 2839 **{ # type: ignore 2840 "this": this, 2841 "expressions": expressions, 2842 "from": self._parse_from(joins=True), 2843 "where": self._parse_where(), 2844 "returning": returning or self._parse_returning(), 2845 "order": self._parse_order(), 2846 "limit": self._parse_limit(), 2847 }, 2848 ) 2849 2850 def _parse_uncache(self) -> exp.Uncache: 2851 if not self._match(TokenType.TABLE): 2852 self.raise_error("Expecting TABLE after UNCACHE") 2853 2854 return self.expression( 2855 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 2856 ) 2857 2858 def _parse_cache(self) -> exp.Cache: 2859 lazy = self._match_text_seq("LAZY") 2860 self._match(TokenType.TABLE) 2861 table = self._parse_table(schema=True) 2862 2863 options = [] 2864 if self._match_text_seq("OPTIONS"): 2865 self._match_l_paren() 2866 k = self._parse_string() 2867 self._match(TokenType.EQ) 2868 v = self._parse_string() 2869 options = [k, v] 2870 self._match_r_paren() 2871 2872 self._match(TokenType.ALIAS) 2873 return self.expression( 2874 exp.Cache, 2875 this=table, 2876 lazy=lazy, 2877 options=options, 2878 expression=self._parse_select(nested=True), 2879 ) 2880 2881 def _parse_partition(self) -> t.Optional[exp.Partition]: 2882 if not self._match(TokenType.PARTITION): 2883 return None 2884 2885 return self.expression( 2886 exp.Partition, expressions=self._parse_wrapped_csv(self._parse_assignment) 2887 ) 2888 2889 def _parse_value(self) -> t.Optional[exp.Tuple]: 2890 if self._match(TokenType.L_PAREN): 2891 expressions = self._parse_csv(self._parse_expression) 2892 self._match_r_paren() 2893 return self.expression(exp.Tuple, expressions=expressions) 2894 2895 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 2896 expression = self._parse_expression() 2897 if expression: 2898 return self.expression(exp.Tuple, expressions=[expression]) 2899 return None 2900 2901 def _parse_projections(self) -> t.List[exp.Expression]: 2902 return self._parse_expressions() 2903 2904 def _parse_select( 2905 self, 2906 nested: bool = False, 2907 table: bool = False, 2908 parse_subquery_alias: bool = True, 2909 parse_set_operation: bool = True, 2910 ) -> t.Optional[exp.Expression]: 2911 cte = self._parse_with() 2912 2913 if cte: 2914 this = self._parse_statement() 2915 2916 if not this: 2917 self.raise_error("Failed to parse any statement following CTE") 2918 return cte 2919 2920 if "with" in this.arg_types: 2921 this.set("with", cte) 2922 else: 2923 self.raise_error(f"{this.key} does not support CTE") 2924 this = cte 2925 2926 return this 2927 2928 # duckdb supports leading with FROM x 2929 from_ = self._parse_from() if self._match(TokenType.FROM, advance=False) else None 2930 2931 if self._match(TokenType.SELECT): 2932 comments = self._prev_comments 2933 2934 hint = self._parse_hint() 2935 2936 if self._next and not self._next.token_type == TokenType.DOT: 2937 all_ = self._match(TokenType.ALL) 2938 distinct = self._match_set(self.DISTINCT_TOKENS) 2939 else: 2940 all_, distinct = None, None 2941 2942 kind = ( 2943 self._match(TokenType.ALIAS) 2944 and self._match_texts(("STRUCT", "VALUE")) 2945 and self._prev.text.upper() 2946 ) 2947 2948 if distinct: 2949 distinct = self.expression( 2950 exp.Distinct, 2951 on=self._parse_value() if self._match(TokenType.ON) else None, 2952 ) 2953 2954 if all_ and distinct: 2955 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 2956 2957 limit = self._parse_limit(top=True) 2958 projections = self._parse_projections() 2959 2960 this = self.expression( 2961 exp.Select, 2962 kind=kind, 2963 hint=hint, 2964 distinct=distinct, 2965 expressions=projections, 2966 limit=limit, 2967 ) 2968 this.comments = comments 2969 2970 into = self._parse_into() 2971 if into: 2972 this.set("into", into) 2973 2974 if not from_: 2975 from_ = self._parse_from() 2976 2977 if from_: 2978 this.set("from", from_) 2979 2980 this = self._parse_query_modifiers(this) 2981 elif (table or nested) and self._match(TokenType.L_PAREN): 2982 if self._match(TokenType.PIVOT): 2983 this = self._parse_simplified_pivot() 2984 elif self._match(TokenType.FROM): 2985 this = exp.select("*").from_( 2986 t.cast(exp.From, self._parse_from(skip_from_token=True)) 2987 ) 2988 else: 2989 this = ( 2990 self._parse_table() 2991 if table 2992 else self._parse_select(nested=True, parse_set_operation=False) 2993 ) 2994 2995 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 2996 # in case a modifier (e.g. join) is following 2997 if table and isinstance(this, exp.Values) and this.alias: 2998 alias = this.args["alias"].pop() 2999 this = exp.Table(this=this, alias=alias) 3000 3001 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3002 3003 self._match_r_paren() 3004 3005 # We return early here so that the UNION isn't attached to the subquery by the 3006 # following call to _parse_set_operations, but instead becomes the parent node 3007 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3008 elif self._match(TokenType.VALUES, advance=False): 3009 this = self._parse_derived_table_values() 3010 elif from_: 3011 this = exp.select("*").from_(from_.this, copy=False) 3012 elif self._match(TokenType.SUMMARIZE): 3013 table = self._match(TokenType.TABLE) 3014 this = self._parse_select() or self._parse_string() or self._parse_table() 3015 return self.expression(exp.Summarize, this=this, table=table) 3016 elif self._match(TokenType.DESCRIBE): 3017 this = self._parse_describe() 3018 elif self._match_text_seq("STREAM"): 3019 this = self.expression(exp.Stream, this=self._parse_function()) 3020 else: 3021 this = None 3022 3023 return self._parse_set_operations(this) if parse_set_operation else this 3024 3025 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3026 if not skip_with_token and not self._match(TokenType.WITH): 3027 return None 3028 3029 comments = self._prev_comments 3030 recursive = self._match(TokenType.RECURSIVE) 3031 3032 expressions = [] 3033 while True: 3034 expressions.append(self._parse_cte()) 3035 3036 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3037 break 3038 else: 3039 self._match(TokenType.WITH) 3040 3041 return self.expression( 3042 exp.With, comments=comments, expressions=expressions, recursive=recursive 3043 ) 3044 3045 def _parse_cte(self) -> exp.CTE: 3046 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3047 if not alias or not alias.this: 3048 self.raise_error("Expected CTE to have alias") 3049 3050 self._match(TokenType.ALIAS) 3051 comments = self._prev_comments 3052 3053 if self._match_text_seq("NOT", "MATERIALIZED"): 3054 materialized = False 3055 elif self._match_text_seq("MATERIALIZED"): 3056 materialized = True 3057 else: 3058 materialized = None 3059 3060 return self.expression( 3061 exp.CTE, 3062 this=self._parse_wrapped(self._parse_statement), 3063 alias=alias, 3064 materialized=materialized, 3065 comments=comments, 3066 ) 3067 3068 def _parse_table_alias( 3069 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3070 ) -> t.Optional[exp.TableAlias]: 3071 any_token = self._match(TokenType.ALIAS) 3072 alias = ( 3073 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3074 or self._parse_string_as_identifier() 3075 ) 3076 3077 index = self._index 3078 if self._match(TokenType.L_PAREN): 3079 columns = self._parse_csv(self._parse_function_parameter) 3080 self._match_r_paren() if columns else self._retreat(index) 3081 else: 3082 columns = None 3083 3084 if not alias and not columns: 3085 return None 3086 3087 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3088 3089 # We bubble up comments from the Identifier to the TableAlias 3090 if isinstance(alias, exp.Identifier): 3091 table_alias.add_comments(alias.pop_comments()) 3092 3093 return table_alias 3094 3095 def _parse_subquery( 3096 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3097 ) -> t.Optional[exp.Subquery]: 3098 if not this: 3099 return None 3100 3101 return self.expression( 3102 exp.Subquery, 3103 this=this, 3104 pivots=self._parse_pivots(), 3105 alias=self._parse_table_alias() if parse_alias else None, 3106 sample=self._parse_table_sample(), 3107 ) 3108 3109 def _implicit_unnests_to_explicit(self, this: E) -> E: 3110 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3111 3112 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3113 for i, join in enumerate(this.args.get("joins") or []): 3114 table = join.this 3115 normalized_table = table.copy() 3116 normalized_table.meta["maybe_column"] = True 3117 normalized_table = _norm(normalized_table, dialect=self.dialect) 3118 3119 if isinstance(table, exp.Table) and not join.args.get("on"): 3120 if normalized_table.parts[0].name in refs: 3121 table_as_column = table.to_column() 3122 unnest = exp.Unnest(expressions=[table_as_column]) 3123 3124 # Table.to_column creates a parent Alias node that we want to convert to 3125 # a TableAlias and attach to the Unnest, so it matches the parser's output 3126 if isinstance(table.args.get("alias"), exp.TableAlias): 3127 table_as_column.replace(table_as_column.this) 3128 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3129 3130 table.replace(unnest) 3131 3132 refs.add(normalized_table.alias_or_name) 3133 3134 return this 3135 3136 def _parse_query_modifiers( 3137 self, this: t.Optional[exp.Expression] 3138 ) -> t.Optional[exp.Expression]: 3139 if isinstance(this, (exp.Query, exp.Table)): 3140 for join in self._parse_joins(): 3141 this.append("joins", join) 3142 for lateral in iter(self._parse_lateral, None): 3143 this.append("laterals", lateral) 3144 3145 while True: 3146 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3147 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3148 key, expression = parser(self) 3149 3150 if expression: 3151 this.set(key, expression) 3152 if key == "limit": 3153 offset = expression.args.pop("offset", None) 3154 3155 if offset: 3156 offset = exp.Offset(expression=offset) 3157 this.set("offset", offset) 3158 3159 limit_by_expressions = expression.expressions 3160 expression.set("expressions", None) 3161 offset.set("expressions", limit_by_expressions) 3162 continue 3163 break 3164 3165 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3166 this = self._implicit_unnests_to_explicit(this) 3167 3168 return this 3169 3170 def _parse_hint(self) -> t.Optional[exp.Hint]: 3171 if self._match(TokenType.HINT): 3172 hints = [] 3173 for hint in iter( 3174 lambda: self._parse_csv( 3175 lambda: self._parse_function() or self._parse_var(upper=True) 3176 ), 3177 [], 3178 ): 3179 hints.extend(hint) 3180 3181 if not self._match_pair(TokenType.STAR, TokenType.SLASH): 3182 self.raise_error("Expected */ after HINT") 3183 3184 return self.expression(exp.Hint, expressions=hints) 3185 3186 return None 3187 3188 def _parse_into(self) -> t.Optional[exp.Into]: 3189 if not self._match(TokenType.INTO): 3190 return None 3191 3192 temp = self._match(TokenType.TEMPORARY) 3193 unlogged = self._match_text_seq("UNLOGGED") 3194 self._match(TokenType.TABLE) 3195 3196 return self.expression( 3197 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3198 ) 3199 3200 def _parse_from( 3201 self, joins: bool = False, skip_from_token: bool = False 3202 ) -> t.Optional[exp.From]: 3203 if not skip_from_token and not self._match(TokenType.FROM): 3204 return None 3205 3206 return self.expression( 3207 exp.From, comments=self._prev_comments, this=self._parse_table(joins=joins) 3208 ) 3209 3210 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3211 return self.expression( 3212 exp.MatchRecognizeMeasure, 3213 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3214 this=self._parse_expression(), 3215 ) 3216 3217 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3218 if not self._match(TokenType.MATCH_RECOGNIZE): 3219 return None 3220 3221 self._match_l_paren() 3222 3223 partition = self._parse_partition_by() 3224 order = self._parse_order() 3225 3226 measures = ( 3227 self._parse_csv(self._parse_match_recognize_measure) 3228 if self._match_text_seq("MEASURES") 3229 else None 3230 ) 3231 3232 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3233 rows = exp.var("ONE ROW PER MATCH") 3234 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3235 text = "ALL ROWS PER MATCH" 3236 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3237 text += " SHOW EMPTY MATCHES" 3238 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3239 text += " OMIT EMPTY MATCHES" 3240 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3241 text += " WITH UNMATCHED ROWS" 3242 rows = exp.var(text) 3243 else: 3244 rows = None 3245 3246 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3247 text = "AFTER MATCH SKIP" 3248 if self._match_text_seq("PAST", "LAST", "ROW"): 3249 text += " PAST LAST ROW" 3250 elif self._match_text_seq("TO", "NEXT", "ROW"): 3251 text += " TO NEXT ROW" 3252 elif self._match_text_seq("TO", "FIRST"): 3253 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3254 elif self._match_text_seq("TO", "LAST"): 3255 text += f" TO LAST {self._advance_any().text}" # type: ignore 3256 after = exp.var(text) 3257 else: 3258 after = None 3259 3260 if self._match_text_seq("PATTERN"): 3261 self._match_l_paren() 3262 3263 if not self._curr: 3264 self.raise_error("Expecting )", self._curr) 3265 3266 paren = 1 3267 start = self._curr 3268 3269 while self._curr and paren > 0: 3270 if self._curr.token_type == TokenType.L_PAREN: 3271 paren += 1 3272 if self._curr.token_type == TokenType.R_PAREN: 3273 paren -= 1 3274 3275 end = self._prev 3276 self._advance() 3277 3278 if paren > 0: 3279 self.raise_error("Expecting )", self._curr) 3280 3281 pattern = exp.var(self._find_sql(start, end)) 3282 else: 3283 pattern = None 3284 3285 define = ( 3286 self._parse_csv(self._parse_name_as_expression) 3287 if self._match_text_seq("DEFINE") 3288 else None 3289 ) 3290 3291 self._match_r_paren() 3292 3293 return self.expression( 3294 exp.MatchRecognize, 3295 partition_by=partition, 3296 order=order, 3297 measures=measures, 3298 rows=rows, 3299 after=after, 3300 pattern=pattern, 3301 define=define, 3302 alias=self._parse_table_alias(), 3303 ) 3304 3305 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3306 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3307 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3308 cross_apply = False 3309 3310 if cross_apply is not None: 3311 this = self._parse_select(table=True) 3312 view = None 3313 outer = None 3314 elif self._match(TokenType.LATERAL): 3315 this = self._parse_select(table=True) 3316 view = self._match(TokenType.VIEW) 3317 outer = self._match(TokenType.OUTER) 3318 else: 3319 return None 3320 3321 if not this: 3322 this = ( 3323 self._parse_unnest() 3324 or self._parse_function() 3325 or self._parse_id_var(any_token=False) 3326 ) 3327 3328 while self._match(TokenType.DOT): 3329 this = exp.Dot( 3330 this=this, 3331 expression=self._parse_function() or self._parse_id_var(any_token=False), 3332 ) 3333 3334 if view: 3335 table = self._parse_id_var(any_token=False) 3336 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3337 table_alias: t.Optional[exp.TableAlias] = self.expression( 3338 exp.TableAlias, this=table, columns=columns 3339 ) 3340 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3341 # We move the alias from the lateral's child node to the lateral itself 3342 table_alias = this.args["alias"].pop() 3343 else: 3344 table_alias = self._parse_table_alias() 3345 3346 return self.expression( 3347 exp.Lateral, 3348 this=this, 3349 view=view, 3350 outer=outer, 3351 alias=table_alias, 3352 cross_apply=cross_apply, 3353 ) 3354 3355 def _parse_join_parts( 3356 self, 3357 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3358 return ( 3359 self._match_set(self.JOIN_METHODS) and self._prev, 3360 self._match_set(self.JOIN_SIDES) and self._prev, 3361 self._match_set(self.JOIN_KINDS) and self._prev, 3362 ) 3363 3364 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3365 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3366 this = self._parse_column() 3367 if isinstance(this, exp.Column): 3368 return this.this 3369 return this 3370 3371 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3372 3373 def _parse_join( 3374 self, skip_join_token: bool = False, parse_bracket: bool = False 3375 ) -> t.Optional[exp.Join]: 3376 if self._match(TokenType.COMMA): 3377 return self.expression(exp.Join, this=self._parse_table()) 3378 3379 index = self._index 3380 method, side, kind = self._parse_join_parts() 3381 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3382 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3383 3384 if not skip_join_token and not join: 3385 self._retreat(index) 3386 kind = None 3387 method = None 3388 side = None 3389 3390 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3391 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3392 3393 if not skip_join_token and not join and not outer_apply and not cross_apply: 3394 return None 3395 3396 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3397 3398 if method: 3399 kwargs["method"] = method.text 3400 if side: 3401 kwargs["side"] = side.text 3402 if kind: 3403 kwargs["kind"] = kind.text 3404 if hint: 3405 kwargs["hint"] = hint 3406 3407 if self._match(TokenType.MATCH_CONDITION): 3408 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3409 3410 if self._match(TokenType.ON): 3411 kwargs["on"] = self._parse_assignment() 3412 elif self._match(TokenType.USING): 3413 kwargs["using"] = self._parse_using_identifiers() 3414 elif ( 3415 not (outer_apply or cross_apply) 3416 and not isinstance(kwargs["this"], exp.Unnest) 3417 and not (kind and kind.token_type == TokenType.CROSS) 3418 ): 3419 index = self._index 3420 joins: t.Optional[list] = list(self._parse_joins()) 3421 3422 if joins and self._match(TokenType.ON): 3423 kwargs["on"] = self._parse_assignment() 3424 elif joins and self._match(TokenType.USING): 3425 kwargs["using"] = self._parse_using_identifiers() 3426 else: 3427 joins = None 3428 self._retreat(index) 3429 3430 kwargs["this"].set("joins", joins if joins else None) 3431 3432 comments = [c for token in (method, side, kind) if token for c in token.comments] 3433 return self.expression(exp.Join, comments=comments, **kwargs) 3434 3435 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3436 this = self._parse_assignment() 3437 3438 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3439 return this 3440 3441 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3442 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3443 3444 return this 3445 3446 def _parse_index_params(self) -> exp.IndexParameters: 3447 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3448 3449 if self._match(TokenType.L_PAREN, advance=False): 3450 columns = self._parse_wrapped_csv(self._parse_with_operator) 3451 else: 3452 columns = None 3453 3454 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3455 partition_by = self._parse_partition_by() 3456 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3457 tablespace = ( 3458 self._parse_var(any_token=True) 3459 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3460 else None 3461 ) 3462 where = self._parse_where() 3463 3464 on = self._parse_field() if self._match(TokenType.ON) else None 3465 3466 return self.expression( 3467 exp.IndexParameters, 3468 using=using, 3469 columns=columns, 3470 include=include, 3471 partition_by=partition_by, 3472 where=where, 3473 with_storage=with_storage, 3474 tablespace=tablespace, 3475 on=on, 3476 ) 3477 3478 def _parse_index( 3479 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3480 ) -> t.Optional[exp.Index]: 3481 if index or anonymous: 3482 unique = None 3483 primary = None 3484 amp = None 3485 3486 self._match(TokenType.ON) 3487 self._match(TokenType.TABLE) # hive 3488 table = self._parse_table_parts(schema=True) 3489 else: 3490 unique = self._match(TokenType.UNIQUE) 3491 primary = self._match_text_seq("PRIMARY") 3492 amp = self._match_text_seq("AMP") 3493 3494 if not self._match(TokenType.INDEX): 3495 return None 3496 3497 index = self._parse_id_var() 3498 table = None 3499 3500 params = self._parse_index_params() 3501 3502 return self.expression( 3503 exp.Index, 3504 this=index, 3505 table=table, 3506 unique=unique, 3507 primary=primary, 3508 amp=amp, 3509 params=params, 3510 ) 3511 3512 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3513 hints: t.List[exp.Expression] = [] 3514 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3515 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3516 hints.append( 3517 self.expression( 3518 exp.WithTableHint, 3519 expressions=self._parse_csv( 3520 lambda: self._parse_function() or self._parse_var(any_token=True) 3521 ), 3522 ) 3523 ) 3524 self._match_r_paren() 3525 else: 3526 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3527 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3528 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3529 3530 self._match_set((TokenType.INDEX, TokenType.KEY)) 3531 if self._match(TokenType.FOR): 3532 hint.set("target", self._advance_any() and self._prev.text.upper()) 3533 3534 hint.set("expressions", self._parse_wrapped_id_vars()) 3535 hints.append(hint) 3536 3537 return hints or None 3538 3539 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3540 return ( 3541 (not schema and self._parse_function(optional_parens=False)) 3542 or self._parse_id_var(any_token=False) 3543 or self._parse_string_as_identifier() 3544 or self._parse_placeholder() 3545 ) 3546 3547 def _parse_table_parts( 3548 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3549 ) -> exp.Table: 3550 catalog = None 3551 db = None 3552 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3553 3554 while self._match(TokenType.DOT): 3555 if catalog: 3556 # This allows nesting the table in arbitrarily many dot expressions if needed 3557 table = self.expression( 3558 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3559 ) 3560 else: 3561 catalog = db 3562 db = table 3563 # "" used for tsql FROM a..b case 3564 table = self._parse_table_part(schema=schema) or "" 3565 3566 if ( 3567 wildcard 3568 and self._is_connected() 3569 and (isinstance(table, exp.Identifier) or not table) 3570 and self._match(TokenType.STAR) 3571 ): 3572 if isinstance(table, exp.Identifier): 3573 table.args["this"] += "*" 3574 else: 3575 table = exp.Identifier(this="*") 3576 3577 # We bubble up comments from the Identifier to the Table 3578 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3579 3580 if is_db_reference: 3581 catalog = db 3582 db = table 3583 table = None 3584 3585 if not table and not is_db_reference: 3586 self.raise_error(f"Expected table name but got {self._curr}") 3587 if not db and is_db_reference: 3588 self.raise_error(f"Expected database name but got {self._curr}") 3589 3590 table = self.expression( 3591 exp.Table, 3592 comments=comments, 3593 this=table, 3594 db=db, 3595 catalog=catalog, 3596 ) 3597 3598 changes = self._parse_changes() 3599 if changes: 3600 table.set("changes", changes) 3601 3602 at_before = self._parse_historical_data() 3603 if at_before: 3604 table.set("when", at_before) 3605 3606 pivots = self._parse_pivots() 3607 if pivots: 3608 table.set("pivots", pivots) 3609 3610 return table 3611 3612 def _parse_table( 3613 self, 3614 schema: bool = False, 3615 joins: bool = False, 3616 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 3617 parse_bracket: bool = False, 3618 is_db_reference: bool = False, 3619 parse_partition: bool = False, 3620 ) -> t.Optional[exp.Expression]: 3621 lateral = self._parse_lateral() 3622 if lateral: 3623 return lateral 3624 3625 unnest = self._parse_unnest() 3626 if unnest: 3627 return unnest 3628 3629 values = self._parse_derived_table_values() 3630 if values: 3631 return values 3632 3633 subquery = self._parse_select(table=True) 3634 if subquery: 3635 if not subquery.args.get("pivots"): 3636 subquery.set("pivots", self._parse_pivots()) 3637 return subquery 3638 3639 bracket = parse_bracket and self._parse_bracket(None) 3640 bracket = self.expression(exp.Table, this=bracket) if bracket else None 3641 3642 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 3643 self._parse_table 3644 ) 3645 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 3646 3647 only = self._match(TokenType.ONLY) 3648 3649 this = t.cast( 3650 exp.Expression, 3651 bracket 3652 or rows_from 3653 or self._parse_bracket( 3654 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 3655 ), 3656 ) 3657 3658 if only: 3659 this.set("only", only) 3660 3661 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 3662 self._match_text_seq("*") 3663 3664 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 3665 if parse_partition and self._match(TokenType.PARTITION, advance=False): 3666 this.set("partition", self._parse_partition()) 3667 3668 if schema: 3669 return self._parse_schema(this=this) 3670 3671 version = self._parse_version() 3672 3673 if version: 3674 this.set("version", version) 3675 3676 if self.dialect.ALIAS_POST_TABLESAMPLE: 3677 this.set("sample", self._parse_table_sample()) 3678 3679 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3680 if alias: 3681 this.set("alias", alias) 3682 3683 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 3684 return self.expression( 3685 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 3686 ) 3687 3688 this.set("hints", self._parse_table_hints()) 3689 3690 if not this.args.get("pivots"): 3691 this.set("pivots", self._parse_pivots()) 3692 3693 if not self.dialect.ALIAS_POST_TABLESAMPLE: 3694 this.set("sample", self._parse_table_sample()) 3695 3696 if joins: 3697 for join in self._parse_joins(): 3698 this.append("joins", join) 3699 3700 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 3701 this.set("ordinality", True) 3702 this.set("alias", self._parse_table_alias()) 3703 3704 return this 3705 3706 def _parse_version(self) -> t.Optional[exp.Version]: 3707 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 3708 this = "TIMESTAMP" 3709 elif self._match(TokenType.VERSION_SNAPSHOT): 3710 this = "VERSION" 3711 else: 3712 return None 3713 3714 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 3715 kind = self._prev.text.upper() 3716 start = self._parse_bitwise() 3717 self._match_texts(("TO", "AND")) 3718 end = self._parse_bitwise() 3719 expression: t.Optional[exp.Expression] = self.expression( 3720 exp.Tuple, expressions=[start, end] 3721 ) 3722 elif self._match_text_seq("CONTAINED", "IN"): 3723 kind = "CONTAINED IN" 3724 expression = self.expression( 3725 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 3726 ) 3727 elif self._match(TokenType.ALL): 3728 kind = "ALL" 3729 expression = None 3730 else: 3731 self._match_text_seq("AS", "OF") 3732 kind = "AS OF" 3733 expression = self._parse_type() 3734 3735 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 3736 3737 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 3738 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 3739 index = self._index 3740 historical_data = None 3741 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 3742 this = self._prev.text.upper() 3743 kind = ( 3744 self._match(TokenType.L_PAREN) 3745 and self._match_texts(self.HISTORICAL_DATA_KIND) 3746 and self._prev.text.upper() 3747 ) 3748 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 3749 3750 if expression: 3751 self._match_r_paren() 3752 historical_data = self.expression( 3753 exp.HistoricalData, this=this, kind=kind, expression=expression 3754 ) 3755 else: 3756 self._retreat(index) 3757 3758 return historical_data 3759 3760 def _parse_changes(self) -> t.Optional[exp.Changes]: 3761 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 3762 return None 3763 3764 information = self._parse_var(any_token=True) 3765 self._match_r_paren() 3766 3767 return self.expression( 3768 exp.Changes, 3769 information=information, 3770 at_before=self._parse_historical_data(), 3771 end=self._parse_historical_data(), 3772 ) 3773 3774 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 3775 if not self._match(TokenType.UNNEST): 3776 return None 3777 3778 expressions = self._parse_wrapped_csv(self._parse_equality) 3779 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3780 3781 alias = self._parse_table_alias() if with_alias else None 3782 3783 if alias: 3784 if self.dialect.UNNEST_COLUMN_ONLY: 3785 if alias.args.get("columns"): 3786 self.raise_error("Unexpected extra column alias in unnest.") 3787 3788 alias.set("columns", [alias.this]) 3789 alias.set("this", None) 3790 3791 columns = alias.args.get("columns") or [] 3792 if offset and len(expressions) < len(columns): 3793 offset = columns.pop() 3794 3795 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 3796 self._match(TokenType.ALIAS) 3797 offset = self._parse_id_var( 3798 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 3799 ) or exp.to_identifier("offset") 3800 3801 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 3802 3803 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 3804 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 3805 if not is_derived and not ( 3806 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 3807 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 3808 ): 3809 return None 3810 3811 expressions = self._parse_csv(self._parse_value) 3812 alias = self._parse_table_alias() 3813 3814 if is_derived: 3815 self._match_r_paren() 3816 3817 return self.expression( 3818 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 3819 ) 3820 3821 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 3822 if not self._match(TokenType.TABLE_SAMPLE) and not ( 3823 as_modifier and self._match_text_seq("USING", "SAMPLE") 3824 ): 3825 return None 3826 3827 bucket_numerator = None 3828 bucket_denominator = None 3829 bucket_field = None 3830 percent = None 3831 size = None 3832 seed = None 3833 3834 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 3835 matched_l_paren = self._match(TokenType.L_PAREN) 3836 3837 if self.TABLESAMPLE_CSV: 3838 num = None 3839 expressions = self._parse_csv(self._parse_primary) 3840 else: 3841 expressions = None 3842 num = ( 3843 self._parse_factor() 3844 if self._match(TokenType.NUMBER, advance=False) 3845 else self._parse_primary() or self._parse_placeholder() 3846 ) 3847 3848 if self._match_text_seq("BUCKET"): 3849 bucket_numerator = self._parse_number() 3850 self._match_text_seq("OUT", "OF") 3851 bucket_denominator = bucket_denominator = self._parse_number() 3852 self._match(TokenType.ON) 3853 bucket_field = self._parse_field() 3854 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 3855 percent = num 3856 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 3857 size = num 3858 else: 3859 percent = num 3860 3861 if matched_l_paren: 3862 self._match_r_paren() 3863 3864 if self._match(TokenType.L_PAREN): 3865 method = self._parse_var(upper=True) 3866 seed = self._match(TokenType.COMMA) and self._parse_number() 3867 self._match_r_paren() 3868 elif self._match_texts(("SEED", "REPEATABLE")): 3869 seed = self._parse_wrapped(self._parse_number) 3870 3871 if not method and self.DEFAULT_SAMPLING_METHOD: 3872 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 3873 3874 return self.expression( 3875 exp.TableSample, 3876 expressions=expressions, 3877 method=method, 3878 bucket_numerator=bucket_numerator, 3879 bucket_denominator=bucket_denominator, 3880 bucket_field=bucket_field, 3881 percent=percent, 3882 size=size, 3883 seed=seed, 3884 ) 3885 3886 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 3887 return list(iter(self._parse_pivot, None)) or None 3888 3889 def _parse_joins(self) -> t.Iterator[exp.Join]: 3890 return iter(self._parse_join, None) 3891 3892 # https://duckdb.org/docs/sql/statements/pivot 3893 def _parse_simplified_pivot(self) -> exp.Pivot: 3894 def _parse_on() -> t.Optional[exp.Expression]: 3895 this = self._parse_bitwise() 3896 return self._parse_in(this) if self._match(TokenType.IN) else this 3897 3898 this = self._parse_table() 3899 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 3900 using = self._match(TokenType.USING) and self._parse_csv( 3901 lambda: self._parse_alias(self._parse_function()) 3902 ) 3903 group = self._parse_group() 3904 return self.expression( 3905 exp.Pivot, this=this, expressions=expressions, using=using, group=group 3906 ) 3907 3908 def _parse_pivot_in(self) -> exp.In | exp.PivotAny: 3909 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 3910 this = self._parse_select_or_expression() 3911 3912 self._match(TokenType.ALIAS) 3913 alias = self._parse_bitwise() 3914 if alias: 3915 if isinstance(alias, exp.Column) and not alias.db: 3916 alias = alias.this 3917 return self.expression(exp.PivotAlias, this=this, alias=alias) 3918 3919 return this 3920 3921 value = self._parse_column() 3922 3923 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 3924 self.raise_error("Expecting IN (") 3925 3926 if self._match(TokenType.ANY): 3927 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 3928 else: 3929 exprs = self._parse_csv(_parse_aliased_expression) 3930 3931 self._match_r_paren() 3932 return self.expression(exp.In, this=value, expressions=exprs) 3933 3934 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 3935 index = self._index 3936 include_nulls = None 3937 3938 if self._match(TokenType.PIVOT): 3939 unpivot = False 3940 elif self._match(TokenType.UNPIVOT): 3941 unpivot = True 3942 3943 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 3944 if self._match_text_seq("INCLUDE", "NULLS"): 3945 include_nulls = True 3946 elif self._match_text_seq("EXCLUDE", "NULLS"): 3947 include_nulls = False 3948 else: 3949 return None 3950 3951 expressions = [] 3952 3953 if not self._match(TokenType.L_PAREN): 3954 self._retreat(index) 3955 return None 3956 3957 if unpivot: 3958 expressions = self._parse_csv(self._parse_column) 3959 else: 3960 expressions = self._parse_csv(lambda: self._parse_alias(self._parse_function())) 3961 3962 if not expressions: 3963 self.raise_error("Failed to parse PIVOT's aggregation list") 3964 3965 if not self._match(TokenType.FOR): 3966 self.raise_error("Expecting FOR") 3967 3968 field = self._parse_pivot_in() 3969 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 3970 self._parse_bitwise 3971 ) 3972 3973 self._match_r_paren() 3974 3975 pivot = self.expression( 3976 exp.Pivot, 3977 expressions=expressions, 3978 field=field, 3979 unpivot=unpivot, 3980 include_nulls=include_nulls, 3981 default_on_null=default_on_null, 3982 ) 3983 3984 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 3985 pivot.set("alias", self._parse_table_alias()) 3986 3987 if not unpivot: 3988 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 3989 3990 columns: t.List[exp.Expression] = [] 3991 for fld in pivot.args["field"].expressions: 3992 field_name = fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 3993 for name in names: 3994 if self.PREFIXED_PIVOT_COLUMNS: 3995 name = f"{name}_{field_name}" if name else field_name 3996 else: 3997 name = f"{field_name}_{name}" if name else field_name 3998 3999 columns.append(exp.to_identifier(name)) 4000 4001 pivot.set("columns", columns) 4002 4003 return pivot 4004 4005 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4006 return [agg.alias for agg in aggregations] 4007 4008 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4009 if not skip_where_token and not self._match(TokenType.PREWHERE): 4010 return None 4011 4012 return self.expression( 4013 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4014 ) 4015 4016 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4017 if not skip_where_token and not self._match(TokenType.WHERE): 4018 return None 4019 4020 return self.expression( 4021 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4022 ) 4023 4024 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4025 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4026 return None 4027 4028 elements: t.Dict[str, t.Any] = defaultdict(list) 4029 4030 if self._match(TokenType.ALL): 4031 elements["all"] = True 4032 elif self._match(TokenType.DISTINCT): 4033 elements["all"] = False 4034 4035 while True: 4036 index = self._index 4037 4038 elements["expressions"].extend( 4039 self._parse_csv( 4040 lambda: None 4041 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4042 else self._parse_assignment() 4043 ) 4044 ) 4045 4046 before_with_index = self._index 4047 with_prefix = self._match(TokenType.WITH) 4048 4049 if self._match(TokenType.ROLLUP): 4050 elements["rollup"].append( 4051 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4052 ) 4053 elif self._match(TokenType.CUBE): 4054 elements["cube"].append( 4055 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4056 ) 4057 elif self._match(TokenType.GROUPING_SETS): 4058 elements["grouping_sets"].append( 4059 self.expression( 4060 exp.GroupingSets, 4061 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4062 ) 4063 ) 4064 elif self._match_text_seq("TOTALS"): 4065 elements["totals"] = True # type: ignore 4066 4067 if before_with_index <= self._index <= before_with_index + 1: 4068 self._retreat(before_with_index) 4069 break 4070 4071 if index == self._index: 4072 break 4073 4074 return self.expression(exp.Group, **elements) # type: ignore 4075 4076 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4077 return self.expression( 4078 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4079 ) 4080 4081 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4082 if self._match(TokenType.L_PAREN): 4083 grouping_set = self._parse_csv(self._parse_column) 4084 self._match_r_paren() 4085 return self.expression(exp.Tuple, expressions=grouping_set) 4086 4087 return self._parse_column() 4088 4089 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4090 if not skip_having_token and not self._match(TokenType.HAVING): 4091 return None 4092 return self.expression(exp.Having, this=self._parse_assignment()) 4093 4094 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4095 if not self._match(TokenType.QUALIFY): 4096 return None 4097 return self.expression(exp.Qualify, this=self._parse_assignment()) 4098 4099 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4100 if skip_start_token: 4101 start = None 4102 elif self._match(TokenType.START_WITH): 4103 start = self._parse_assignment() 4104 else: 4105 return None 4106 4107 self._match(TokenType.CONNECT_BY) 4108 nocycle = self._match_text_seq("NOCYCLE") 4109 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4110 exp.Prior, this=self._parse_bitwise() 4111 ) 4112 connect = self._parse_assignment() 4113 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4114 4115 if not start and self._match(TokenType.START_WITH): 4116 start = self._parse_assignment() 4117 4118 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4119 4120 def _parse_name_as_expression(self) -> exp.Alias: 4121 return self.expression( 4122 exp.Alias, 4123 alias=self._parse_id_var(any_token=True), 4124 this=self._match(TokenType.ALIAS) and self._parse_assignment(), 4125 ) 4126 4127 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4128 if self._match_text_seq("INTERPOLATE"): 4129 return self._parse_wrapped_csv(self._parse_name_as_expression) 4130 return None 4131 4132 def _parse_order( 4133 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4134 ) -> t.Optional[exp.Expression]: 4135 siblings = None 4136 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4137 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4138 return this 4139 4140 siblings = True 4141 4142 return self.expression( 4143 exp.Order, 4144 this=this, 4145 expressions=self._parse_csv(self._parse_ordered), 4146 siblings=siblings, 4147 ) 4148 4149 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4150 if not self._match(token): 4151 return None 4152 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4153 4154 def _parse_ordered( 4155 self, parse_method: t.Optional[t.Callable] = None 4156 ) -> t.Optional[exp.Ordered]: 4157 this = parse_method() if parse_method else self._parse_assignment() 4158 if not this: 4159 return None 4160 4161 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4162 this = exp.var("ALL") 4163 4164 asc = self._match(TokenType.ASC) 4165 desc = self._match(TokenType.DESC) or (asc and False) 4166 4167 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4168 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4169 4170 nulls_first = is_nulls_first or False 4171 explicitly_null_ordered = is_nulls_first or is_nulls_last 4172 4173 if ( 4174 not explicitly_null_ordered 4175 and ( 4176 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4177 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4178 ) 4179 and self.dialect.NULL_ORDERING != "nulls_are_last" 4180 ): 4181 nulls_first = True 4182 4183 if self._match_text_seq("WITH", "FILL"): 4184 with_fill = self.expression( 4185 exp.WithFill, 4186 **{ # type: ignore 4187 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4188 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4189 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4190 "interpolate": self._parse_interpolate(), 4191 }, 4192 ) 4193 else: 4194 with_fill = None 4195 4196 return self.expression( 4197 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4198 ) 4199 4200 def _parse_limit( 4201 self, 4202 this: t.Optional[exp.Expression] = None, 4203 top: bool = False, 4204 skip_limit_token: bool = False, 4205 ) -> t.Optional[exp.Expression]: 4206 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4207 comments = self._prev_comments 4208 if top: 4209 limit_paren = self._match(TokenType.L_PAREN) 4210 expression = self._parse_term() if limit_paren else self._parse_number() 4211 4212 if limit_paren: 4213 self._match_r_paren() 4214 else: 4215 expression = self._parse_term() 4216 4217 if self._match(TokenType.COMMA): 4218 offset = expression 4219 expression = self._parse_term() 4220 else: 4221 offset = None 4222 4223 limit_exp = self.expression( 4224 exp.Limit, 4225 this=this, 4226 expression=expression, 4227 offset=offset, 4228 comments=comments, 4229 expressions=self._parse_limit_by(), 4230 ) 4231 4232 return limit_exp 4233 4234 if self._match(TokenType.FETCH): 4235 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4236 direction = self._prev.text.upper() if direction else "FIRST" 4237 4238 count = self._parse_field(tokens=self.FETCH_TOKENS) 4239 percent = self._match(TokenType.PERCENT) 4240 4241 self._match_set((TokenType.ROW, TokenType.ROWS)) 4242 4243 only = self._match_text_seq("ONLY") 4244 with_ties = self._match_text_seq("WITH", "TIES") 4245 4246 if only and with_ties: 4247 self.raise_error("Cannot specify both ONLY and WITH TIES in FETCH clause") 4248 4249 return self.expression( 4250 exp.Fetch, 4251 direction=direction, 4252 count=count, 4253 percent=percent, 4254 with_ties=with_ties, 4255 ) 4256 4257 return this 4258 4259 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4260 if not self._match(TokenType.OFFSET): 4261 return this 4262 4263 count = self._parse_term() 4264 self._match_set((TokenType.ROW, TokenType.ROWS)) 4265 4266 return self.expression( 4267 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4268 ) 4269 4270 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4271 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4272 4273 def _parse_locks(self) -> t.List[exp.Lock]: 4274 locks = [] 4275 while True: 4276 if self._match_text_seq("FOR", "UPDATE"): 4277 update = True 4278 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4279 "LOCK", "IN", "SHARE", "MODE" 4280 ): 4281 update = False 4282 else: 4283 break 4284 4285 expressions = None 4286 if self._match_text_seq("OF"): 4287 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4288 4289 wait: t.Optional[bool | exp.Expression] = None 4290 if self._match_text_seq("NOWAIT"): 4291 wait = True 4292 elif self._match_text_seq("WAIT"): 4293 wait = self._parse_primary() 4294 elif self._match_text_seq("SKIP", "LOCKED"): 4295 wait = False 4296 4297 locks.append( 4298 self.expression(exp.Lock, update=update, expressions=expressions, wait=wait) 4299 ) 4300 4301 return locks 4302 4303 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4304 while this and self._match_set(self.SET_OPERATIONS): 4305 token_type = self._prev.token_type 4306 4307 if token_type == TokenType.UNION: 4308 operation: t.Type[exp.SetOperation] = exp.Union 4309 elif token_type == TokenType.EXCEPT: 4310 operation = exp.Except 4311 else: 4312 operation = exp.Intersect 4313 4314 comments = self._prev.comments 4315 4316 if self._match(TokenType.DISTINCT): 4317 distinct: t.Optional[bool] = True 4318 elif self._match(TokenType.ALL): 4319 distinct = False 4320 else: 4321 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4322 if distinct is None: 4323 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4324 4325 by_name = self._match_text_seq("BY", "NAME") 4326 expression = self._parse_select(nested=True, parse_set_operation=False) 4327 4328 this = self.expression( 4329 operation, 4330 comments=comments, 4331 this=this, 4332 distinct=distinct, 4333 by_name=by_name, 4334 expression=expression, 4335 ) 4336 4337 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4338 expression = this.expression 4339 4340 if expression: 4341 for arg in self.SET_OP_MODIFIERS: 4342 expr = expression.args.get(arg) 4343 if expr: 4344 this.set(arg, expr.pop()) 4345 4346 return this 4347 4348 def _parse_expression(self) -> t.Optional[exp.Expression]: 4349 return self._parse_alias(self._parse_assignment()) 4350 4351 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4352 this = self._parse_disjunction() 4353 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4354 # This allows us to parse <non-identifier token> := <expr> 4355 this = exp.column( 4356 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4357 ) 4358 4359 while self._match_set(self.ASSIGNMENT): 4360 if isinstance(this, exp.Column) and len(this.parts) == 1: 4361 this = this.this 4362 4363 this = self.expression( 4364 self.ASSIGNMENT[self._prev.token_type], 4365 this=this, 4366 comments=self._prev_comments, 4367 expression=self._parse_assignment(), 4368 ) 4369 4370 return this 4371 4372 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4373 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4374 4375 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4376 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4377 4378 def _parse_equality(self) -> t.Optional[exp.Expression]: 4379 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4380 4381 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4382 return self._parse_tokens(self._parse_range, self.COMPARISON) 4383 4384 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4385 this = this or self._parse_bitwise() 4386 negate = self._match(TokenType.NOT) 4387 4388 if self._match_set(self.RANGE_PARSERS): 4389 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4390 if not expression: 4391 return this 4392 4393 this = expression 4394 elif self._match(TokenType.ISNULL): 4395 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4396 4397 # Postgres supports ISNULL and NOTNULL for conditions. 4398 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4399 if self._match(TokenType.NOTNULL): 4400 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4401 this = self.expression(exp.Not, this=this) 4402 4403 if negate: 4404 this = self._negate_range(this) 4405 4406 if self._match(TokenType.IS): 4407 this = self._parse_is(this) 4408 4409 return this 4410 4411 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4412 if not this: 4413 return this 4414 4415 return self.expression(exp.Not, this=this) 4416 4417 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4418 index = self._index - 1 4419 negate = self._match(TokenType.NOT) 4420 4421 if self._match_text_seq("DISTINCT", "FROM"): 4422 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4423 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4424 4425 if self._match(TokenType.JSON): 4426 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4427 4428 if self._match_text_seq("WITH"): 4429 _with = True 4430 elif self._match_text_seq("WITHOUT"): 4431 _with = False 4432 else: 4433 _with = None 4434 4435 unique = self._match(TokenType.UNIQUE) 4436 self._match_text_seq("KEYS") 4437 expression: t.Optional[exp.Expression] = self.expression( 4438 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4439 ) 4440 else: 4441 expression = self._parse_primary() or self._parse_null() 4442 if not expression: 4443 self._retreat(index) 4444 return None 4445 4446 this = self.expression(exp.Is, this=this, expression=expression) 4447 return self.expression(exp.Not, this=this) if negate else this 4448 4449 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4450 unnest = self._parse_unnest(with_alias=False) 4451 if unnest: 4452 this = self.expression(exp.In, this=this, unnest=unnest) 4453 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4454 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4455 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4456 4457 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4458 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4459 else: 4460 this = self.expression(exp.In, this=this, expressions=expressions) 4461 4462 if matched_l_paren: 4463 self._match_r_paren(this) 4464 elif not self._match(TokenType.R_BRACKET, expression=this): 4465 self.raise_error("Expecting ]") 4466 else: 4467 this = self.expression(exp.In, this=this, field=self._parse_field()) 4468 4469 return this 4470 4471 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 4472 low = self._parse_bitwise() 4473 self._match(TokenType.AND) 4474 high = self._parse_bitwise() 4475 return self.expression(exp.Between, this=this, low=low, high=high) 4476 4477 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4478 if not self._match(TokenType.ESCAPE): 4479 return this 4480 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 4481 4482 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 4483 index = self._index 4484 4485 if not self._match(TokenType.INTERVAL) and match_interval: 4486 return None 4487 4488 if self._match(TokenType.STRING, advance=False): 4489 this = self._parse_primary() 4490 else: 4491 this = self._parse_term() 4492 4493 if not this or ( 4494 isinstance(this, exp.Column) 4495 and not this.table 4496 and not this.this.quoted 4497 and this.name.upper() == "IS" 4498 ): 4499 self._retreat(index) 4500 return None 4501 4502 unit = self._parse_function() or ( 4503 not self._match(TokenType.ALIAS, advance=False) 4504 and self._parse_var(any_token=True, upper=True) 4505 ) 4506 4507 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 4508 # each INTERVAL expression into this canonical form so it's easy to transpile 4509 if this and this.is_number: 4510 this = exp.Literal.string(this.to_py()) 4511 elif this and this.is_string: 4512 parts = exp.INTERVAL_STRING_RE.findall(this.name) 4513 if len(parts) == 1: 4514 if unit: 4515 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 4516 self._retreat(self._index - 1) 4517 4518 this = exp.Literal.string(parts[0][0]) 4519 unit = self.expression(exp.Var, this=parts[0][1].upper()) 4520 4521 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 4522 unit = self.expression( 4523 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 4524 ) 4525 4526 interval = self.expression(exp.Interval, this=this, unit=unit) 4527 4528 index = self._index 4529 self._match(TokenType.PLUS) 4530 4531 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 4532 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 4533 return self.expression( 4534 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 4535 ) 4536 4537 self._retreat(index) 4538 return interval 4539 4540 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 4541 this = self._parse_term() 4542 4543 while True: 4544 if self._match_set(self.BITWISE): 4545 this = self.expression( 4546 self.BITWISE[self._prev.token_type], 4547 this=this, 4548 expression=self._parse_term(), 4549 ) 4550 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 4551 this = self.expression( 4552 exp.DPipe, 4553 this=this, 4554 expression=self._parse_term(), 4555 safe=not self.dialect.STRICT_STRING_CONCAT, 4556 ) 4557 elif self._match(TokenType.DQMARK): 4558 this = self.expression( 4559 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 4560 ) 4561 elif self._match_pair(TokenType.LT, TokenType.LT): 4562 this = self.expression( 4563 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 4564 ) 4565 elif self._match_pair(TokenType.GT, TokenType.GT): 4566 this = self.expression( 4567 exp.BitwiseRightShift, this=this, expression=self._parse_term() 4568 ) 4569 else: 4570 break 4571 4572 return this 4573 4574 def _parse_term(self) -> t.Optional[exp.Expression]: 4575 this = self._parse_factor() 4576 4577 while self._match_set(self.TERM): 4578 klass = self.TERM[self._prev.token_type] 4579 comments = self._prev_comments 4580 expression = self._parse_factor() 4581 4582 this = self.expression(klass, this=this, comments=comments, expression=expression) 4583 4584 if isinstance(this, exp.Collate): 4585 expr = this.expression 4586 4587 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 4588 # fallback to Identifier / Var 4589 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 4590 ident = expr.this 4591 if isinstance(ident, exp.Identifier): 4592 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 4593 4594 return this 4595 4596 def _parse_factor(self) -> t.Optional[exp.Expression]: 4597 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 4598 this = parse_method() 4599 4600 while self._match_set(self.FACTOR): 4601 klass = self.FACTOR[self._prev.token_type] 4602 comments = self._prev_comments 4603 expression = parse_method() 4604 4605 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 4606 self._retreat(self._index - 1) 4607 return this 4608 4609 this = self.expression(klass, this=this, comments=comments, expression=expression) 4610 4611 if isinstance(this, exp.Div): 4612 this.args["typed"] = self.dialect.TYPED_DIVISION 4613 this.args["safe"] = self.dialect.SAFE_DIVISION 4614 4615 return this 4616 4617 def _parse_exponent(self) -> t.Optional[exp.Expression]: 4618 return self._parse_tokens(self._parse_unary, self.EXPONENT) 4619 4620 def _parse_unary(self) -> t.Optional[exp.Expression]: 4621 if self._match_set(self.UNARY_PARSERS): 4622 return self.UNARY_PARSERS[self._prev.token_type](self) 4623 return self._parse_at_time_zone(self._parse_type()) 4624 4625 def _parse_type( 4626 self, parse_interval: bool = True, fallback_to_identifier: bool = False 4627 ) -> t.Optional[exp.Expression]: 4628 interval = parse_interval and self._parse_interval() 4629 if interval: 4630 return interval 4631 4632 index = self._index 4633 data_type = self._parse_types(check_func=True, allow_identifiers=False) 4634 4635 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 4636 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 4637 if isinstance(data_type, exp.Cast): 4638 # This constructor can contain ops directly after it, for instance struct unnesting: 4639 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 4640 return self._parse_column_ops(data_type) 4641 4642 if data_type: 4643 index2 = self._index 4644 this = self._parse_primary() 4645 4646 if isinstance(this, exp.Literal): 4647 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 4648 if parser: 4649 return parser(self, this, data_type) 4650 4651 return self.expression(exp.Cast, this=this, to=data_type) 4652 4653 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 4654 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 4655 # 4656 # If the index difference here is greater than 1, that means the parser itself must have 4657 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 4658 # 4659 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 4660 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 4661 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 4662 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 4663 # 4664 # In these cases, we don't really want to return the converted type, but instead retreat 4665 # and try to parse a Column or Identifier in the section below. 4666 if data_type.expressions and index2 - index > 1: 4667 self._retreat(index2) 4668 return self._parse_column_ops(data_type) 4669 4670 self._retreat(index) 4671 4672 if fallback_to_identifier: 4673 return self._parse_id_var() 4674 4675 this = self._parse_column() 4676 return this and self._parse_column_ops(this) 4677 4678 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 4679 this = self._parse_type() 4680 if not this: 4681 return None 4682 4683 if isinstance(this, exp.Column) and not this.table: 4684 this = exp.var(this.name.upper()) 4685 4686 return self.expression( 4687 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 4688 ) 4689 4690 def _parse_types( 4691 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 4692 ) -> t.Optional[exp.Expression]: 4693 index = self._index 4694 4695 this: t.Optional[exp.Expression] = None 4696 prefix = self._match_text_seq("SYSUDTLIB", ".") 4697 4698 if not self._match_set(self.TYPE_TOKENS): 4699 identifier = allow_identifiers and self._parse_id_var( 4700 any_token=False, tokens=(TokenType.VAR,) 4701 ) 4702 if isinstance(identifier, exp.Identifier): 4703 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 4704 4705 if len(tokens) != 1: 4706 self.raise_error("Unexpected identifier", self._prev) 4707 4708 if tokens[0].token_type in self.TYPE_TOKENS: 4709 self._prev = tokens[0] 4710 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 4711 type_name = identifier.name 4712 4713 while self._match(TokenType.DOT): 4714 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 4715 4716 this = exp.DataType.build(type_name, udt=True) 4717 else: 4718 self._retreat(self._index - 1) 4719 return None 4720 else: 4721 return None 4722 4723 type_token = self._prev.token_type 4724 4725 if type_token == TokenType.PSEUDO_TYPE: 4726 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 4727 4728 if type_token == TokenType.OBJECT_IDENTIFIER: 4729 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 4730 4731 # https://materialize.com/docs/sql/types/map/ 4732 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 4733 key_type = self._parse_types( 4734 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4735 ) 4736 if not self._match(TokenType.FARROW): 4737 self._retreat(index) 4738 return None 4739 4740 value_type = self._parse_types( 4741 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4742 ) 4743 if not self._match(TokenType.R_BRACKET): 4744 self._retreat(index) 4745 return None 4746 4747 return exp.DataType( 4748 this=exp.DataType.Type.MAP, 4749 expressions=[key_type, value_type], 4750 nested=True, 4751 prefix=prefix, 4752 ) 4753 4754 nested = type_token in self.NESTED_TYPE_TOKENS 4755 is_struct = type_token in self.STRUCT_TYPE_TOKENS 4756 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 4757 expressions = None 4758 maybe_func = False 4759 4760 if self._match(TokenType.L_PAREN): 4761 if is_struct: 4762 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4763 elif nested: 4764 expressions = self._parse_csv( 4765 lambda: self._parse_types( 4766 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4767 ) 4768 ) 4769 if type_token == TokenType.NULLABLE and len(expressions) == 1: 4770 this = expressions[0] 4771 this.set("nullable", True) 4772 self._match_r_paren() 4773 return this 4774 elif type_token in self.ENUM_TYPE_TOKENS: 4775 expressions = self._parse_csv(self._parse_equality) 4776 elif is_aggregate: 4777 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 4778 any_token=False, tokens=(TokenType.VAR,) 4779 ) 4780 if not func_or_ident or not self._match(TokenType.COMMA): 4781 return None 4782 expressions = self._parse_csv( 4783 lambda: self._parse_types( 4784 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4785 ) 4786 ) 4787 expressions.insert(0, func_or_ident) 4788 else: 4789 expressions = self._parse_csv(self._parse_type_size) 4790 4791 # https://docs.snowflake.com/en/sql-reference/data-types-vector 4792 if type_token == TokenType.VECTOR and len(expressions) == 2: 4793 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 4794 4795 if not expressions or not self._match(TokenType.R_PAREN): 4796 self._retreat(index) 4797 return None 4798 4799 maybe_func = True 4800 4801 values: t.Optional[t.List[exp.Expression]] = None 4802 4803 if nested and self._match(TokenType.LT): 4804 if is_struct: 4805 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 4806 else: 4807 expressions = self._parse_csv( 4808 lambda: self._parse_types( 4809 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 4810 ) 4811 ) 4812 4813 if not self._match(TokenType.GT): 4814 self.raise_error("Expecting >") 4815 4816 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 4817 values = self._parse_csv(self._parse_assignment) 4818 if not values and is_struct: 4819 values = None 4820 self._retreat(self._index - 1) 4821 else: 4822 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 4823 4824 if type_token in self.TIMESTAMPS: 4825 if self._match_text_seq("WITH", "TIME", "ZONE"): 4826 maybe_func = False 4827 tz_type = ( 4828 exp.DataType.Type.TIMETZ 4829 if type_token in self.TIMES 4830 else exp.DataType.Type.TIMESTAMPTZ 4831 ) 4832 this = exp.DataType(this=tz_type, expressions=expressions) 4833 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 4834 maybe_func = False 4835 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 4836 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 4837 maybe_func = False 4838 elif type_token == TokenType.INTERVAL: 4839 unit = self._parse_var(upper=True) 4840 if unit: 4841 if self._match_text_seq("TO"): 4842 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 4843 4844 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 4845 else: 4846 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 4847 4848 if maybe_func and check_func: 4849 index2 = self._index 4850 peek = self._parse_string() 4851 4852 if not peek: 4853 self._retreat(index) 4854 return None 4855 4856 self._retreat(index2) 4857 4858 if not this: 4859 if self._match_text_seq("UNSIGNED"): 4860 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 4861 if not unsigned_type_token: 4862 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 4863 4864 type_token = unsigned_type_token or type_token 4865 4866 this = exp.DataType( 4867 this=exp.DataType.Type[type_token.value], 4868 expressions=expressions, 4869 nested=nested, 4870 prefix=prefix, 4871 ) 4872 4873 # Empty arrays/structs are allowed 4874 if values is not None: 4875 cls = exp.Struct if is_struct else exp.Array 4876 this = exp.cast(cls(expressions=values), this, copy=False) 4877 4878 elif expressions: 4879 this.set("expressions", expressions) 4880 4881 # https://materialize.com/docs/sql/types/list/#type-name 4882 while self._match(TokenType.LIST): 4883 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 4884 4885 index = self._index 4886 4887 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 4888 matched_array = self._match(TokenType.ARRAY) 4889 4890 while self._curr: 4891 datatype_token = self._prev.token_type 4892 matched_l_bracket = self._match(TokenType.L_BRACKET) 4893 if not matched_l_bracket and not matched_array: 4894 break 4895 4896 matched_array = False 4897 values = self._parse_csv(self._parse_assignment) or None 4898 if ( 4899 values 4900 and not schema 4901 and ( 4902 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 4903 ) 4904 ): 4905 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 4906 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 4907 self._retreat(index) 4908 break 4909 4910 this = exp.DataType( 4911 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 4912 ) 4913 self._match(TokenType.R_BRACKET) 4914 4915 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 4916 converter = self.TYPE_CONVERTERS.get(this.this) 4917 if converter: 4918 this = converter(t.cast(exp.DataType, this)) 4919 4920 return this 4921 4922 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 4923 index = self._index 4924 4925 if ( 4926 self._curr 4927 and self._next 4928 and self._curr.token_type in self.TYPE_TOKENS 4929 and self._next.token_type in self.TYPE_TOKENS 4930 ): 4931 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 4932 # type token. Without this, the list will be parsed as a type and we'll eventually crash 4933 this = self._parse_id_var() 4934 else: 4935 this = ( 4936 self._parse_type(parse_interval=False, fallback_to_identifier=True) 4937 or self._parse_id_var() 4938 ) 4939 4940 self._match(TokenType.COLON) 4941 4942 if ( 4943 type_required 4944 and not isinstance(this, exp.DataType) 4945 and not self._match_set(self.TYPE_TOKENS, advance=False) 4946 ): 4947 self._retreat(index) 4948 return self._parse_types() 4949 4950 return self._parse_column_def(this) 4951 4952 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4953 if not self._match_text_seq("AT", "TIME", "ZONE"): 4954 return this 4955 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 4956 4957 def _parse_column(self) -> t.Optional[exp.Expression]: 4958 this = self._parse_column_reference() 4959 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 4960 4961 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 4962 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 4963 4964 return column 4965 4966 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 4967 this = self._parse_field() 4968 if ( 4969 not this 4970 and self._match(TokenType.VALUES, advance=False) 4971 and self.VALUES_FOLLOWED_BY_PAREN 4972 and (not self._next or self._next.token_type != TokenType.L_PAREN) 4973 ): 4974 this = self._parse_id_var() 4975 4976 if isinstance(this, exp.Identifier): 4977 # We bubble up comments from the Identifier to the Column 4978 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 4979 4980 return this 4981 4982 def _parse_colon_as_variant_extract( 4983 self, this: t.Optional[exp.Expression] 4984 ) -> t.Optional[exp.Expression]: 4985 casts = [] 4986 json_path = [] 4987 escape = None 4988 4989 while self._match(TokenType.COLON): 4990 start_index = self._index 4991 4992 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 4993 path = self._parse_column_ops( 4994 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 4995 ) 4996 4997 # The cast :: operator has a lower precedence than the extraction operator :, so 4998 # we rearrange the AST appropriately to avoid casting the JSON path 4999 while isinstance(path, exp.Cast): 5000 casts.append(path.to) 5001 path = path.this 5002 5003 if casts: 5004 dcolon_offset = next( 5005 i 5006 for i, t in enumerate(self._tokens[start_index:]) 5007 if t.token_type == TokenType.DCOLON 5008 ) 5009 end_token = self._tokens[start_index + dcolon_offset - 1] 5010 else: 5011 end_token = self._prev 5012 5013 if path: 5014 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5015 # it'll roundtrip to a string literal in GET_PATH 5016 if isinstance(path, exp.Identifier) and path.quoted: 5017 escape = True 5018 5019 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5020 5021 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5022 # Databricks transforms it back to the colon/dot notation 5023 if json_path: 5024 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5025 5026 if json_path_expr: 5027 json_path_expr.set("escape", escape) 5028 5029 this = self.expression( 5030 exp.JSONExtract, 5031 this=this, 5032 expression=json_path_expr, 5033 variant_extract=True, 5034 ) 5035 5036 while casts: 5037 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5038 5039 return this 5040 5041 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5042 return self._parse_types() 5043 5044 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5045 this = self._parse_bracket(this) 5046 5047 while self._match_set(self.COLUMN_OPERATORS): 5048 op_token = self._prev.token_type 5049 op = self.COLUMN_OPERATORS.get(op_token) 5050 5051 if op_token == TokenType.DCOLON: 5052 field = self._parse_dcolon() 5053 if not field: 5054 self.raise_error("Expected type") 5055 elif op and self._curr: 5056 field = self._parse_column_reference() or self._parse_bracket() 5057 else: 5058 field = self._parse_field(any_token=True, anonymous_func=True) 5059 5060 if isinstance(field, exp.Func) and this: 5061 # bigquery allows function calls like x.y.count(...) 5062 # SAFE.SUBSTR(...) 5063 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5064 this = exp.replace_tree( 5065 this, 5066 lambda n: ( 5067 self.expression(exp.Dot, this=n.args.get("table"), expression=n.this) 5068 if n.table 5069 else n.this 5070 ) 5071 if isinstance(n, exp.Column) 5072 else n, 5073 ) 5074 5075 if op: 5076 this = op(self, this, field) 5077 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5078 this = self.expression( 5079 exp.Column, 5080 this=field, 5081 table=this.this, 5082 db=this.args.get("table"), 5083 catalog=this.args.get("db"), 5084 ) 5085 else: 5086 this = self.expression(exp.Dot, this=this, expression=field) 5087 5088 this = self._parse_bracket(this) 5089 5090 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5091 5092 def _parse_primary(self) -> t.Optional[exp.Expression]: 5093 if self._match_set(self.PRIMARY_PARSERS): 5094 token_type = self._prev.token_type 5095 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5096 5097 if token_type == TokenType.STRING: 5098 expressions = [primary] 5099 while self._match(TokenType.STRING): 5100 expressions.append(exp.Literal.string(self._prev.text)) 5101 5102 if len(expressions) > 1: 5103 return self.expression(exp.Concat, expressions=expressions) 5104 5105 return primary 5106 5107 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5108 return exp.Literal.number(f"0.{self._prev.text}") 5109 5110 if self._match(TokenType.L_PAREN): 5111 comments = self._prev_comments 5112 query = self._parse_select() 5113 5114 if query: 5115 expressions = [query] 5116 else: 5117 expressions = self._parse_expressions() 5118 5119 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5120 5121 if not this and self._match(TokenType.R_PAREN, advance=False): 5122 this = self.expression(exp.Tuple) 5123 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5124 this = self._parse_subquery(this=this, parse_alias=False) 5125 elif isinstance(this, exp.Subquery): 5126 this = self._parse_subquery( 5127 this=self._parse_set_operations(this), parse_alias=False 5128 ) 5129 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5130 this = self.expression(exp.Tuple, expressions=expressions) 5131 else: 5132 this = self.expression(exp.Paren, this=this) 5133 5134 if this: 5135 this.add_comments(comments) 5136 5137 self._match_r_paren(expression=this) 5138 return this 5139 5140 return None 5141 5142 def _parse_field( 5143 self, 5144 any_token: bool = False, 5145 tokens: t.Optional[t.Collection[TokenType]] = None, 5146 anonymous_func: bool = False, 5147 ) -> t.Optional[exp.Expression]: 5148 if anonymous_func: 5149 field = ( 5150 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5151 or self._parse_primary() 5152 ) 5153 else: 5154 field = self._parse_primary() or self._parse_function( 5155 anonymous=anonymous_func, any_token=any_token 5156 ) 5157 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5158 5159 def _parse_function( 5160 self, 5161 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5162 anonymous: bool = False, 5163 optional_parens: bool = True, 5164 any_token: bool = False, 5165 ) -> t.Optional[exp.Expression]: 5166 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5167 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5168 fn_syntax = False 5169 if ( 5170 self._match(TokenType.L_BRACE, advance=False) 5171 and self._next 5172 and self._next.text.upper() == "FN" 5173 ): 5174 self._advance(2) 5175 fn_syntax = True 5176 5177 func = self._parse_function_call( 5178 functions=functions, 5179 anonymous=anonymous, 5180 optional_parens=optional_parens, 5181 any_token=any_token, 5182 ) 5183 5184 if fn_syntax: 5185 self._match(TokenType.R_BRACE) 5186 5187 return func 5188 5189 def _parse_function_call( 5190 self, 5191 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5192 anonymous: bool = False, 5193 optional_parens: bool = True, 5194 any_token: bool = False, 5195 ) -> t.Optional[exp.Expression]: 5196 if not self._curr: 5197 return None 5198 5199 comments = self._curr.comments 5200 token_type = self._curr.token_type 5201 this = self._curr.text 5202 upper = this.upper() 5203 5204 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5205 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5206 self._advance() 5207 return self._parse_window(parser(self)) 5208 5209 if not self._next or self._next.token_type != TokenType.L_PAREN: 5210 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5211 self._advance() 5212 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5213 5214 return None 5215 5216 if any_token: 5217 if token_type in self.RESERVED_TOKENS: 5218 return None 5219 elif token_type not in self.FUNC_TOKENS: 5220 return None 5221 5222 self._advance(2) 5223 5224 parser = self.FUNCTION_PARSERS.get(upper) 5225 if parser and not anonymous: 5226 this = parser(self) 5227 else: 5228 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5229 5230 if subquery_predicate and self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5231 this = self.expression(subquery_predicate, this=self._parse_select()) 5232 self._match_r_paren() 5233 return this 5234 5235 if functions is None: 5236 functions = self.FUNCTIONS 5237 5238 function = functions.get(upper) 5239 5240 alias = upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5241 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5242 5243 if alias: 5244 args = self._kv_to_prop_eq(args) 5245 5246 if function and not anonymous: 5247 if "dialect" in function.__code__.co_varnames: 5248 func = function(args, dialect=self.dialect) 5249 else: 5250 func = function(args) 5251 5252 func = self.validate_expression(func, args) 5253 if not self.dialect.NORMALIZE_FUNCTIONS: 5254 func.meta["name"] = this 5255 5256 this = func 5257 else: 5258 if token_type == TokenType.IDENTIFIER: 5259 this = exp.Identifier(this=this, quoted=True) 5260 this = self.expression(exp.Anonymous, this=this, expressions=args) 5261 5262 if isinstance(this, exp.Expression): 5263 this.add_comments(comments) 5264 5265 self._match_r_paren(this) 5266 return self._parse_window(this) 5267 5268 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5269 return expression 5270 5271 def _kv_to_prop_eq(self, expressions: t.List[exp.Expression]) -> t.List[exp.Expression]: 5272 transformed = [] 5273 5274 for index, e in enumerate(expressions): 5275 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5276 if isinstance(e, exp.Alias): 5277 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5278 5279 if not isinstance(e, exp.PropertyEQ): 5280 e = self.expression( 5281 exp.PropertyEQ, this=exp.to_identifier(e.this.name), expression=e.expression 5282 ) 5283 5284 if isinstance(e.this, exp.Column): 5285 e.this.replace(e.this.this) 5286 else: 5287 e = self._to_prop_eq(e, index) 5288 5289 transformed.append(e) 5290 5291 return transformed 5292 5293 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5294 return self._parse_column_def(self._parse_id_var()) 5295 5296 def _parse_user_defined_function( 5297 self, kind: t.Optional[TokenType] = None 5298 ) -> t.Optional[exp.Expression]: 5299 this = self._parse_id_var() 5300 5301 while self._match(TokenType.DOT): 5302 this = self.expression(exp.Dot, this=this, expression=self._parse_id_var()) 5303 5304 if not self._match(TokenType.L_PAREN): 5305 return this 5306 5307 expressions = self._parse_csv(self._parse_function_parameter) 5308 self._match_r_paren() 5309 return self.expression( 5310 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5311 ) 5312 5313 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5314 literal = self._parse_primary() 5315 if literal: 5316 return self.expression(exp.Introducer, this=token.text, expression=literal) 5317 5318 return self.expression(exp.Identifier, this=token.text) 5319 5320 def _parse_session_parameter(self) -> exp.SessionParameter: 5321 kind = None 5322 this = self._parse_id_var() or self._parse_primary() 5323 5324 if this and self._match(TokenType.DOT): 5325 kind = this.name 5326 this = self._parse_var() or self._parse_primary() 5327 5328 return self.expression(exp.SessionParameter, this=this, kind=kind) 5329 5330 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5331 return self._parse_id_var() 5332 5333 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5334 index = self._index 5335 5336 if self._match(TokenType.L_PAREN): 5337 expressions = t.cast( 5338 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5339 ) 5340 5341 if not self._match(TokenType.R_PAREN): 5342 self._retreat(index) 5343 else: 5344 expressions = [self._parse_lambda_arg()] 5345 5346 if self._match_set(self.LAMBDAS): 5347 return self.LAMBDAS[self._prev.token_type](self, expressions) 5348 5349 self._retreat(index) 5350 5351 this: t.Optional[exp.Expression] 5352 5353 if self._match(TokenType.DISTINCT): 5354 this = self.expression( 5355 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5356 ) 5357 else: 5358 this = self._parse_select_or_expression(alias=alias) 5359 5360 return self._parse_limit( 5361 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5362 ) 5363 5364 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5365 index = self._index 5366 if not self._match(TokenType.L_PAREN): 5367 return this 5368 5369 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5370 # expr can be of both types 5371 if self._match_set(self.SELECT_START_TOKENS): 5372 self._retreat(index) 5373 return this 5374 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5375 self._match_r_paren() 5376 return self.expression(exp.Schema, this=this, expressions=args) 5377 5378 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5379 return self._parse_column_def(self._parse_field(any_token=True)) 5380 5381 def _parse_column_def(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5382 # column defs are not really columns, they're identifiers 5383 if isinstance(this, exp.Column): 5384 this = this.this 5385 5386 kind = self._parse_types(schema=True) 5387 5388 if self._match_text_seq("FOR", "ORDINALITY"): 5389 return self.expression(exp.ColumnDef, this=this, ordinality=True) 5390 5391 constraints: t.List[exp.Expression] = [] 5392 5393 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 5394 ("ALIAS", "MATERIALIZED") 5395 ): 5396 persisted = self._prev.text.upper() == "MATERIALIZED" 5397 constraint_kind = exp.ComputedColumnConstraint( 5398 this=self._parse_assignment(), 5399 persisted=persisted or self._match_text_seq("PERSISTED"), 5400 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 5401 ) 5402 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 5403 elif kind and self._match_pair(TokenType.ALIAS, TokenType.L_PAREN, advance=False): 5404 self._match(TokenType.ALIAS) 5405 constraints.append( 5406 self.expression( 5407 exp.ColumnConstraint, 5408 kind=exp.TransformColumnConstraint(this=self._parse_field()), 5409 ) 5410 ) 5411 5412 while True: 5413 constraint = self._parse_column_constraint() 5414 if not constraint: 5415 break 5416 constraints.append(constraint) 5417 5418 if not kind and not constraints: 5419 return this 5420 5421 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 5422 5423 def _parse_auto_increment( 5424 self, 5425 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 5426 start = None 5427 increment = None 5428 5429 if self._match(TokenType.L_PAREN, advance=False): 5430 args = self._parse_wrapped_csv(self._parse_bitwise) 5431 start = seq_get(args, 0) 5432 increment = seq_get(args, 1) 5433 elif self._match_text_seq("START"): 5434 start = self._parse_bitwise() 5435 self._match_text_seq("INCREMENT") 5436 increment = self._parse_bitwise() 5437 5438 if start and increment: 5439 return exp.GeneratedAsIdentityColumnConstraint(start=start, increment=increment) 5440 5441 return exp.AutoIncrementColumnConstraint() 5442 5443 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 5444 if not self._match_text_seq("REFRESH"): 5445 self._retreat(self._index - 1) 5446 return None 5447 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 5448 5449 def _parse_compress(self) -> exp.CompressColumnConstraint: 5450 if self._match(TokenType.L_PAREN, advance=False): 5451 return self.expression( 5452 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 5453 ) 5454 5455 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 5456 5457 def _parse_generated_as_identity( 5458 self, 5459 ) -> ( 5460 exp.GeneratedAsIdentityColumnConstraint 5461 | exp.ComputedColumnConstraint 5462 | exp.GeneratedAsRowColumnConstraint 5463 ): 5464 if self._match_text_seq("BY", "DEFAULT"): 5465 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 5466 this = self.expression( 5467 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 5468 ) 5469 else: 5470 self._match_text_seq("ALWAYS") 5471 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 5472 5473 self._match(TokenType.ALIAS) 5474 5475 if self._match_text_seq("ROW"): 5476 start = self._match_text_seq("START") 5477 if not start: 5478 self._match(TokenType.END) 5479 hidden = self._match_text_seq("HIDDEN") 5480 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 5481 5482 identity = self._match_text_seq("IDENTITY") 5483 5484 if self._match(TokenType.L_PAREN): 5485 if self._match(TokenType.START_WITH): 5486 this.set("start", self._parse_bitwise()) 5487 if self._match_text_seq("INCREMENT", "BY"): 5488 this.set("increment", self._parse_bitwise()) 5489 if self._match_text_seq("MINVALUE"): 5490 this.set("minvalue", self._parse_bitwise()) 5491 if self._match_text_seq("MAXVALUE"): 5492 this.set("maxvalue", self._parse_bitwise()) 5493 5494 if self._match_text_seq("CYCLE"): 5495 this.set("cycle", True) 5496 elif self._match_text_seq("NO", "CYCLE"): 5497 this.set("cycle", False) 5498 5499 if not identity: 5500 this.set("expression", self._parse_range()) 5501 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 5502 args = self._parse_csv(self._parse_bitwise) 5503 this.set("start", seq_get(args, 0)) 5504 this.set("increment", seq_get(args, 1)) 5505 5506 self._match_r_paren() 5507 5508 return this 5509 5510 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 5511 self._match_text_seq("LENGTH") 5512 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 5513 5514 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 5515 if self._match_text_seq("NULL"): 5516 return self.expression(exp.NotNullColumnConstraint) 5517 if self._match_text_seq("CASESPECIFIC"): 5518 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 5519 if self._match_text_seq("FOR", "REPLICATION"): 5520 return self.expression(exp.NotForReplicationColumnConstraint) 5521 5522 # Unconsume the `NOT` token 5523 self._retreat(self._index - 1) 5524 return None 5525 5526 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 5527 if self._match(TokenType.CONSTRAINT): 5528 this = self._parse_id_var() 5529 else: 5530 this = None 5531 5532 if self._match_texts(self.CONSTRAINT_PARSERS): 5533 return self.expression( 5534 exp.ColumnConstraint, 5535 this=this, 5536 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 5537 ) 5538 5539 return this 5540 5541 def _parse_constraint(self) -> t.Optional[exp.Expression]: 5542 if not self._match(TokenType.CONSTRAINT): 5543 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 5544 5545 return self.expression( 5546 exp.Constraint, 5547 this=self._parse_id_var(), 5548 expressions=self._parse_unnamed_constraints(), 5549 ) 5550 5551 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 5552 constraints = [] 5553 while True: 5554 constraint = self._parse_unnamed_constraint() or self._parse_function() 5555 if not constraint: 5556 break 5557 constraints.append(constraint) 5558 5559 return constraints 5560 5561 def _parse_unnamed_constraint( 5562 self, constraints: t.Optional[t.Collection[str]] = None 5563 ) -> t.Optional[exp.Expression]: 5564 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 5565 constraints or self.CONSTRAINT_PARSERS 5566 ): 5567 return None 5568 5569 constraint = self._prev.text.upper() 5570 if constraint not in self.CONSTRAINT_PARSERS: 5571 self.raise_error(f"No parser found for schema constraint {constraint}.") 5572 5573 return self.CONSTRAINT_PARSERS[constraint](self) 5574 5575 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 5576 return self._parse_id_var(any_token=False) 5577 5578 def _parse_unique(self) -> exp.UniqueColumnConstraint: 5579 self._match_text_seq("KEY") 5580 return self.expression( 5581 exp.UniqueColumnConstraint, 5582 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 5583 this=self._parse_schema(self._parse_unique_key()), 5584 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 5585 on_conflict=self._parse_on_conflict(), 5586 ) 5587 5588 def _parse_key_constraint_options(self) -> t.List[str]: 5589 options = [] 5590 while True: 5591 if not self._curr: 5592 break 5593 5594 if self._match(TokenType.ON): 5595 action = None 5596 on = self._advance_any() and self._prev.text 5597 5598 if self._match_text_seq("NO", "ACTION"): 5599 action = "NO ACTION" 5600 elif self._match_text_seq("CASCADE"): 5601 action = "CASCADE" 5602 elif self._match_text_seq("RESTRICT"): 5603 action = "RESTRICT" 5604 elif self._match_pair(TokenType.SET, TokenType.NULL): 5605 action = "SET NULL" 5606 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 5607 action = "SET DEFAULT" 5608 else: 5609 self.raise_error("Invalid key constraint") 5610 5611 options.append(f"ON {on} {action}") 5612 else: 5613 var = self._parse_var_from_options( 5614 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 5615 ) 5616 if not var: 5617 break 5618 options.append(var.name) 5619 5620 return options 5621 5622 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 5623 if match and not self._match(TokenType.REFERENCES): 5624 return None 5625 5626 expressions = None 5627 this = self._parse_table(schema=True) 5628 options = self._parse_key_constraint_options() 5629 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 5630 5631 def _parse_foreign_key(self) -> exp.ForeignKey: 5632 expressions = self._parse_wrapped_id_vars() 5633 reference = self._parse_references() 5634 options = {} 5635 5636 while self._match(TokenType.ON): 5637 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 5638 self.raise_error("Expected DELETE or UPDATE") 5639 5640 kind = self._prev.text.lower() 5641 5642 if self._match_text_seq("NO", "ACTION"): 5643 action = "NO ACTION" 5644 elif self._match(TokenType.SET): 5645 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 5646 action = "SET " + self._prev.text.upper() 5647 else: 5648 self._advance() 5649 action = self._prev.text.upper() 5650 5651 options[kind] = action 5652 5653 return self.expression( 5654 exp.ForeignKey, 5655 expressions=expressions, 5656 reference=reference, 5657 **options, # type: ignore 5658 ) 5659 5660 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 5661 return self._parse_field() 5662 5663 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 5664 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 5665 self._retreat(self._index - 1) 5666 return None 5667 5668 id_vars = self._parse_wrapped_id_vars() 5669 return self.expression( 5670 exp.PeriodForSystemTimeConstraint, 5671 this=seq_get(id_vars, 0), 5672 expression=seq_get(id_vars, 1), 5673 ) 5674 5675 def _parse_primary_key( 5676 self, wrapped_optional: bool = False, in_props: bool = False 5677 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 5678 desc = ( 5679 self._match_set((TokenType.ASC, TokenType.DESC)) 5680 and self._prev.token_type == TokenType.DESC 5681 ) 5682 5683 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 5684 return self.expression(exp.PrimaryKeyColumnConstraint, desc=desc) 5685 5686 expressions = self._parse_wrapped_csv( 5687 self._parse_primary_key_part, optional=wrapped_optional 5688 ) 5689 options = self._parse_key_constraint_options() 5690 return self.expression(exp.PrimaryKey, expressions=expressions, options=options) 5691 5692 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 5693 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 5694 5695 def _parse_odbc_datetime_literal(self) -> exp.Expression: 5696 """ 5697 Parses a datetime column in ODBC format. We parse the column into the corresponding 5698 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 5699 same as we did for `DATE('yyyy-mm-dd')`. 5700 5701 Reference: 5702 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 5703 """ 5704 self._match(TokenType.VAR) 5705 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 5706 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 5707 if not self._match(TokenType.R_BRACE): 5708 self.raise_error("Expected }") 5709 return expression 5710 5711 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5712 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 5713 return this 5714 5715 bracket_kind = self._prev.token_type 5716 if ( 5717 bracket_kind == TokenType.L_BRACE 5718 and self._curr 5719 and self._curr.token_type == TokenType.VAR 5720 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 5721 ): 5722 return self._parse_odbc_datetime_literal() 5723 5724 expressions = self._parse_csv( 5725 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 5726 ) 5727 5728 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 5729 self.raise_error("Expected ]") 5730 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 5731 self.raise_error("Expected }") 5732 5733 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 5734 if bracket_kind == TokenType.L_BRACE: 5735 this = self.expression(exp.Struct, expressions=self._kv_to_prop_eq(expressions)) 5736 elif not this: 5737 this = build_array_constructor( 5738 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 5739 ) 5740 else: 5741 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 5742 if constructor_type: 5743 return build_array_constructor( 5744 constructor_type, 5745 args=expressions, 5746 bracket_kind=bracket_kind, 5747 dialect=self.dialect, 5748 ) 5749 5750 expressions = apply_index_offset(this, expressions, -self.dialect.INDEX_OFFSET) 5751 this = self.expression(exp.Bracket, this=this, expressions=expressions) 5752 5753 self._add_comments(this) 5754 return self._parse_bracket(this) 5755 5756 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5757 if self._match(TokenType.COLON): 5758 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 5759 return this 5760 5761 def _parse_case(self) -> t.Optional[exp.Expression]: 5762 ifs = [] 5763 default = None 5764 5765 comments = self._prev_comments 5766 expression = self._parse_assignment() 5767 5768 while self._match(TokenType.WHEN): 5769 this = self._parse_assignment() 5770 self._match(TokenType.THEN) 5771 then = self._parse_assignment() 5772 ifs.append(self.expression(exp.If, this=this, true=then)) 5773 5774 if self._match(TokenType.ELSE): 5775 default = self._parse_assignment() 5776 5777 if not self._match(TokenType.END): 5778 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 5779 default = exp.column("interval") 5780 else: 5781 self.raise_error("Expected END after CASE", self._prev) 5782 5783 return self.expression( 5784 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 5785 ) 5786 5787 def _parse_if(self) -> t.Optional[exp.Expression]: 5788 if self._match(TokenType.L_PAREN): 5789 args = self._parse_csv(self._parse_assignment) 5790 this = self.validate_expression(exp.If.from_arg_list(args), args) 5791 self._match_r_paren() 5792 else: 5793 index = self._index - 1 5794 5795 if self.NO_PAREN_IF_COMMANDS and index == 0: 5796 return self._parse_as_command(self._prev) 5797 5798 condition = self._parse_assignment() 5799 5800 if not condition: 5801 self._retreat(index) 5802 return None 5803 5804 self._match(TokenType.THEN) 5805 true = self._parse_assignment() 5806 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 5807 self._match(TokenType.END) 5808 this = self.expression(exp.If, this=condition, true=true, false=false) 5809 5810 return this 5811 5812 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 5813 if not self._match_text_seq("VALUE", "FOR"): 5814 self._retreat(self._index - 1) 5815 return None 5816 5817 return self.expression( 5818 exp.NextValueFor, 5819 this=self._parse_column(), 5820 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 5821 ) 5822 5823 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 5824 this = self._parse_function() or self._parse_var_or_string(upper=True) 5825 5826 if self._match(TokenType.FROM): 5827 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5828 5829 if not self._match(TokenType.COMMA): 5830 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 5831 5832 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 5833 5834 def _parse_gap_fill(self) -> exp.GapFill: 5835 self._match(TokenType.TABLE) 5836 this = self._parse_table() 5837 5838 self._match(TokenType.COMMA) 5839 args = [this, *self._parse_csv(self._parse_lambda)] 5840 5841 gap_fill = exp.GapFill.from_arg_list(args) 5842 return self.validate_expression(gap_fill, args) 5843 5844 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 5845 this = self._parse_assignment() 5846 5847 if not self._match(TokenType.ALIAS): 5848 if self._match(TokenType.COMMA): 5849 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 5850 5851 self.raise_error("Expected AS after CAST") 5852 5853 fmt = None 5854 to = self._parse_types() 5855 5856 if self._match(TokenType.FORMAT): 5857 fmt_string = self._parse_string() 5858 fmt = self._parse_at_time_zone(fmt_string) 5859 5860 if not to: 5861 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 5862 if to.this in exp.DataType.TEMPORAL_TYPES: 5863 this = self.expression( 5864 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 5865 this=this, 5866 format=exp.Literal.string( 5867 format_time( 5868 fmt_string.this if fmt_string else "", 5869 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 5870 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 5871 ) 5872 ), 5873 safe=safe, 5874 ) 5875 5876 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 5877 this.set("zone", fmt.args["zone"]) 5878 return this 5879 elif not to: 5880 self.raise_error("Expected TYPE after CAST") 5881 elif isinstance(to, exp.Identifier): 5882 to = exp.DataType.build(to.name, udt=True) 5883 elif to.this == exp.DataType.Type.CHAR: 5884 if self._match(TokenType.CHARACTER_SET): 5885 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 5886 5887 return self.expression( 5888 exp.Cast if strict else exp.TryCast, 5889 this=this, 5890 to=to, 5891 format=fmt, 5892 safe=safe, 5893 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 5894 ) 5895 5896 def _parse_string_agg(self) -> exp.Expression: 5897 if self._match(TokenType.DISTINCT): 5898 args: t.List[t.Optional[exp.Expression]] = [ 5899 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 5900 ] 5901 if self._match(TokenType.COMMA): 5902 args.extend(self._parse_csv(self._parse_assignment)) 5903 else: 5904 args = self._parse_csv(self._parse_assignment) # type: ignore 5905 5906 index = self._index 5907 if not self._match(TokenType.R_PAREN) and args: 5908 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 5909 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 5910 args[-1] = self._parse_limit(this=self._parse_order(this=args[-1])) 5911 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 5912 5913 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 5914 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 5915 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 5916 if not self._match_text_seq("WITHIN", "GROUP"): 5917 self._retreat(index) 5918 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 5919 5920 self._match_l_paren() # The corresponding match_r_paren will be called in parse_function (caller) 5921 order = self._parse_order(this=seq_get(args, 0)) 5922 return self.expression(exp.GroupConcat, this=order, separator=seq_get(args, 1)) 5923 5924 def _parse_convert( 5925 self, strict: bool, safe: t.Optional[bool] = None 5926 ) -> t.Optional[exp.Expression]: 5927 this = self._parse_bitwise() 5928 5929 if self._match(TokenType.USING): 5930 to: t.Optional[exp.Expression] = self.expression( 5931 exp.CharacterSet, this=self._parse_var() 5932 ) 5933 elif self._match(TokenType.COMMA): 5934 to = self._parse_types() 5935 else: 5936 to = None 5937 5938 return self.expression(exp.Cast if strict else exp.TryCast, this=this, to=to, safe=safe) 5939 5940 def _parse_decode(self) -> t.Optional[exp.Decode | exp.Case]: 5941 """ 5942 There are generally two variants of the DECODE function: 5943 5944 - DECODE(bin, charset) 5945 - DECODE(expression, search, result [, search, result] ... [, default]) 5946 5947 The second variant will always be parsed into a CASE expression. Note that NULL 5948 needs special treatment, since we need to explicitly check for it with `IS NULL`, 5949 instead of relying on pattern matching. 5950 """ 5951 args = self._parse_csv(self._parse_assignment) 5952 5953 if len(args) < 3: 5954 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 5955 5956 expression, *expressions = args 5957 if not expression: 5958 return None 5959 5960 ifs = [] 5961 for search, result in zip(expressions[::2], expressions[1::2]): 5962 if not search or not result: 5963 return None 5964 5965 if isinstance(search, exp.Literal): 5966 ifs.append( 5967 exp.If(this=exp.EQ(this=expression.copy(), expression=search), true=result) 5968 ) 5969 elif isinstance(search, exp.Null): 5970 ifs.append( 5971 exp.If(this=exp.Is(this=expression.copy(), expression=exp.Null()), true=result) 5972 ) 5973 else: 5974 cond = exp.or_( 5975 exp.EQ(this=expression.copy(), expression=search), 5976 exp.and_( 5977 exp.Is(this=expression.copy(), expression=exp.Null()), 5978 exp.Is(this=search.copy(), expression=exp.Null()), 5979 copy=False, 5980 ), 5981 copy=False, 5982 ) 5983 ifs.append(exp.If(this=cond, true=result)) 5984 5985 return exp.Case(ifs=ifs, default=expressions[-1] if len(expressions) % 2 == 1 else None) 5986 5987 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 5988 self._match_text_seq("KEY") 5989 key = self._parse_column() 5990 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 5991 self._match_text_seq("VALUE") 5992 value = self._parse_bitwise() 5993 5994 if not key and not value: 5995 return None 5996 return self.expression(exp.JSONKeyValue, this=key, expression=value) 5997 5998 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5999 if not this or not self._match_text_seq("FORMAT", "JSON"): 6000 return this 6001 6002 return self.expression(exp.FormatJson, this=this) 6003 6004 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6005 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6006 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6007 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6008 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6009 else: 6010 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6011 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6012 6013 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6014 6015 if not empty and not error and not null: 6016 return None 6017 6018 return self.expression( 6019 exp.OnCondition, 6020 empty=empty, 6021 error=error, 6022 null=null, 6023 ) 6024 6025 def _parse_on_handling( 6026 self, on: str, *values: str 6027 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6028 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6029 for value in values: 6030 if self._match_text_seq(value, "ON", on): 6031 return f"{value} ON {on}" 6032 6033 index = self._index 6034 if self._match(TokenType.DEFAULT): 6035 default_value = self._parse_bitwise() 6036 if self._match_text_seq("ON", on): 6037 return default_value 6038 6039 self._retreat(index) 6040 6041 return None 6042 6043 @t.overload 6044 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6045 6046 @t.overload 6047 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6048 6049 def _parse_json_object(self, agg=False): 6050 star = self._parse_star() 6051 expressions = ( 6052 [star] 6053 if star 6054 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6055 ) 6056 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6057 6058 unique_keys = None 6059 if self._match_text_seq("WITH", "UNIQUE"): 6060 unique_keys = True 6061 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6062 unique_keys = False 6063 6064 self._match_text_seq("KEYS") 6065 6066 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6067 self._parse_type() 6068 ) 6069 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6070 6071 return self.expression( 6072 exp.JSONObjectAgg if agg else exp.JSONObject, 6073 expressions=expressions, 6074 null_handling=null_handling, 6075 unique_keys=unique_keys, 6076 return_type=return_type, 6077 encoding=encoding, 6078 ) 6079 6080 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6081 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6082 if not self._match_text_seq("NESTED"): 6083 this = self._parse_id_var() 6084 kind = self._parse_types(allow_identifiers=False) 6085 nested = None 6086 else: 6087 this = None 6088 kind = None 6089 nested = True 6090 6091 path = self._match_text_seq("PATH") and self._parse_string() 6092 nested_schema = nested and self._parse_json_schema() 6093 6094 return self.expression( 6095 exp.JSONColumnDef, 6096 this=this, 6097 kind=kind, 6098 path=path, 6099 nested_schema=nested_schema, 6100 ) 6101 6102 def _parse_json_schema(self) -> exp.JSONSchema: 6103 self._match_text_seq("COLUMNS") 6104 return self.expression( 6105 exp.JSONSchema, 6106 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6107 ) 6108 6109 def _parse_json_table(self) -> exp.JSONTable: 6110 this = self._parse_format_json(self._parse_bitwise()) 6111 path = self._match(TokenType.COMMA) and self._parse_string() 6112 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6113 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6114 schema = self._parse_json_schema() 6115 6116 return exp.JSONTable( 6117 this=this, 6118 schema=schema, 6119 path=path, 6120 error_handling=error_handling, 6121 empty_handling=empty_handling, 6122 ) 6123 6124 def _parse_match_against(self) -> exp.MatchAgainst: 6125 expressions = self._parse_csv(self._parse_column) 6126 6127 self._match_text_seq(")", "AGAINST", "(") 6128 6129 this = self._parse_string() 6130 6131 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6132 modifier = "IN NATURAL LANGUAGE MODE" 6133 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6134 modifier = f"{modifier} WITH QUERY EXPANSION" 6135 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6136 modifier = "IN BOOLEAN MODE" 6137 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6138 modifier = "WITH QUERY EXPANSION" 6139 else: 6140 modifier = None 6141 6142 return self.expression( 6143 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6144 ) 6145 6146 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6147 def _parse_open_json(self) -> exp.OpenJSON: 6148 this = self._parse_bitwise() 6149 path = self._match(TokenType.COMMA) and self._parse_string() 6150 6151 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6152 this = self._parse_field(any_token=True) 6153 kind = self._parse_types() 6154 path = self._parse_string() 6155 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6156 6157 return self.expression( 6158 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6159 ) 6160 6161 expressions = None 6162 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6163 self._match_l_paren() 6164 expressions = self._parse_csv(_parse_open_json_column_def) 6165 6166 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6167 6168 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6169 args = self._parse_csv(self._parse_bitwise) 6170 6171 if self._match(TokenType.IN): 6172 return self.expression( 6173 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6174 ) 6175 6176 if haystack_first: 6177 haystack = seq_get(args, 0) 6178 needle = seq_get(args, 1) 6179 else: 6180 needle = seq_get(args, 0) 6181 haystack = seq_get(args, 1) 6182 6183 return self.expression( 6184 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6185 ) 6186 6187 def _parse_predict(self) -> exp.Predict: 6188 self._match_text_seq("MODEL") 6189 this = self._parse_table() 6190 6191 self._match(TokenType.COMMA) 6192 self._match_text_seq("TABLE") 6193 6194 return self.expression( 6195 exp.Predict, 6196 this=this, 6197 expression=self._parse_table(), 6198 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6199 ) 6200 6201 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6202 args = self._parse_csv(self._parse_table) 6203 return exp.JoinHint(this=func_name.upper(), expressions=args) 6204 6205 def _parse_substring(self) -> exp.Substring: 6206 # Postgres supports the form: substring(string [from int] [for int]) 6207 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6208 6209 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6210 6211 if self._match(TokenType.FROM): 6212 args.append(self._parse_bitwise()) 6213 if self._match(TokenType.FOR): 6214 if len(args) == 1: 6215 args.append(exp.Literal.number(1)) 6216 args.append(self._parse_bitwise()) 6217 6218 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6219 6220 def _parse_trim(self) -> exp.Trim: 6221 # https://www.w3resource.com/sql/character-functions/trim.php 6222 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6223 6224 position = None 6225 collation = None 6226 expression = None 6227 6228 if self._match_texts(self.TRIM_TYPES): 6229 position = self._prev.text.upper() 6230 6231 this = self._parse_bitwise() 6232 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6233 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6234 expression = self._parse_bitwise() 6235 6236 if invert_order: 6237 this, expression = expression, this 6238 6239 if self._match(TokenType.COLLATE): 6240 collation = self._parse_bitwise() 6241 6242 return self.expression( 6243 exp.Trim, this=this, position=position, expression=expression, collation=collation 6244 ) 6245 6246 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6247 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6248 6249 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6250 return self._parse_window(self._parse_id_var(), alias=True) 6251 6252 def _parse_respect_or_ignore_nulls( 6253 self, this: t.Optional[exp.Expression] 6254 ) -> t.Optional[exp.Expression]: 6255 if self._match_text_seq("IGNORE", "NULLS"): 6256 return self.expression(exp.IgnoreNulls, this=this) 6257 if self._match_text_seq("RESPECT", "NULLS"): 6258 return self.expression(exp.RespectNulls, this=this) 6259 return this 6260 6261 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6262 if self._match(TokenType.HAVING): 6263 self._match_texts(("MAX", "MIN")) 6264 max = self._prev.text.upper() != "MIN" 6265 return self.expression( 6266 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6267 ) 6268 6269 return this 6270 6271 def _parse_window( 6272 self, this: t.Optional[exp.Expression], alias: bool = False 6273 ) -> t.Optional[exp.Expression]: 6274 func = this 6275 comments = func.comments if isinstance(func, exp.Expression) else None 6276 6277 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6278 self._match(TokenType.WHERE) 6279 this = self.expression( 6280 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6281 ) 6282 self._match_r_paren() 6283 6284 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6285 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6286 if self._match_text_seq("WITHIN", "GROUP"): 6287 order = self._parse_wrapped(self._parse_order) 6288 this = self.expression(exp.WithinGroup, this=this, expression=order) 6289 6290 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6291 # Some dialects choose to implement and some do not. 6292 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6293 6294 # There is some code above in _parse_lambda that handles 6295 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6296 6297 # The below changes handle 6298 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 6299 6300 # Oracle allows both formats 6301 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 6302 # and Snowflake chose to do the same for familiarity 6303 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 6304 if isinstance(this, exp.AggFunc): 6305 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 6306 6307 if ignore_respect and ignore_respect is not this: 6308 ignore_respect.replace(ignore_respect.this) 6309 this = self.expression(ignore_respect.__class__, this=this) 6310 6311 this = self._parse_respect_or_ignore_nulls(this) 6312 6313 # bigquery select from window x AS (partition by ...) 6314 if alias: 6315 over = None 6316 self._match(TokenType.ALIAS) 6317 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 6318 return this 6319 else: 6320 over = self._prev.text.upper() 6321 6322 if comments and isinstance(func, exp.Expression): 6323 func.pop_comments() 6324 6325 if not self._match(TokenType.L_PAREN): 6326 return self.expression( 6327 exp.Window, 6328 comments=comments, 6329 this=this, 6330 alias=self._parse_id_var(False), 6331 over=over, 6332 ) 6333 6334 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 6335 6336 first = self._match(TokenType.FIRST) 6337 if self._match_text_seq("LAST"): 6338 first = False 6339 6340 partition, order = self._parse_partition_and_order() 6341 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 6342 6343 if kind: 6344 self._match(TokenType.BETWEEN) 6345 start = self._parse_window_spec() 6346 self._match(TokenType.AND) 6347 end = self._parse_window_spec() 6348 6349 spec = self.expression( 6350 exp.WindowSpec, 6351 kind=kind, 6352 start=start["value"], 6353 start_side=start["side"], 6354 end=end["value"], 6355 end_side=end["side"], 6356 ) 6357 else: 6358 spec = None 6359 6360 self._match_r_paren() 6361 6362 window = self.expression( 6363 exp.Window, 6364 comments=comments, 6365 this=this, 6366 partition_by=partition, 6367 order=order, 6368 spec=spec, 6369 alias=window_alias, 6370 over=over, 6371 first=first, 6372 ) 6373 6374 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 6375 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 6376 return self._parse_window(window, alias=alias) 6377 6378 return window 6379 6380 def _parse_partition_and_order( 6381 self, 6382 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 6383 return self._parse_partition_by(), self._parse_order() 6384 6385 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 6386 self._match(TokenType.BETWEEN) 6387 6388 return { 6389 "value": ( 6390 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 6391 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 6392 or self._parse_bitwise() 6393 ), 6394 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 6395 } 6396 6397 def _parse_alias( 6398 self, this: t.Optional[exp.Expression], explicit: bool = False 6399 ) -> t.Optional[exp.Expression]: 6400 any_token = self._match(TokenType.ALIAS) 6401 comments = self._prev_comments or [] 6402 6403 if explicit and not any_token: 6404 return this 6405 6406 if self._match(TokenType.L_PAREN): 6407 aliases = self.expression( 6408 exp.Aliases, 6409 comments=comments, 6410 this=this, 6411 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 6412 ) 6413 self._match_r_paren(aliases) 6414 return aliases 6415 6416 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 6417 self.STRING_ALIASES and self._parse_string_as_identifier() 6418 ) 6419 6420 if alias: 6421 comments.extend(alias.pop_comments()) 6422 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 6423 column = this.this 6424 6425 # Moves the comment next to the alias in `expr /* comment */ AS alias` 6426 if not this.comments and column and column.comments: 6427 this.comments = column.pop_comments() 6428 6429 return this 6430 6431 def _parse_id_var( 6432 self, 6433 any_token: bool = True, 6434 tokens: t.Optional[t.Collection[TokenType]] = None, 6435 ) -> t.Optional[exp.Expression]: 6436 expression = self._parse_identifier() 6437 if not expression and ( 6438 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 6439 ): 6440 quoted = self._prev.token_type == TokenType.STRING 6441 expression = self.expression(exp.Identifier, this=self._prev.text, quoted=quoted) 6442 6443 return expression 6444 6445 def _parse_string(self) -> t.Optional[exp.Expression]: 6446 if self._match_set(self.STRING_PARSERS): 6447 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 6448 return self._parse_placeholder() 6449 6450 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 6451 return exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 6452 6453 def _parse_number(self) -> t.Optional[exp.Expression]: 6454 if self._match_set(self.NUMERIC_PARSERS): 6455 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 6456 return self._parse_placeholder() 6457 6458 def _parse_identifier(self) -> t.Optional[exp.Expression]: 6459 if self._match(TokenType.IDENTIFIER): 6460 return self.expression(exp.Identifier, this=self._prev.text, quoted=True) 6461 return self._parse_placeholder() 6462 6463 def _parse_var( 6464 self, 6465 any_token: bool = False, 6466 tokens: t.Optional[t.Collection[TokenType]] = None, 6467 upper: bool = False, 6468 ) -> t.Optional[exp.Expression]: 6469 if ( 6470 (any_token and self._advance_any()) 6471 or self._match(TokenType.VAR) 6472 or (self._match_set(tokens) if tokens else False) 6473 ): 6474 return self.expression( 6475 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 6476 ) 6477 return self._parse_placeholder() 6478 6479 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 6480 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 6481 self._advance() 6482 return self._prev 6483 return None 6484 6485 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 6486 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 6487 6488 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 6489 return self._parse_primary() or self._parse_var(any_token=True) 6490 6491 def _parse_null(self) -> t.Optional[exp.Expression]: 6492 if self._match_set(self.NULL_TOKENS): 6493 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 6494 return self._parse_placeholder() 6495 6496 def _parse_boolean(self) -> t.Optional[exp.Expression]: 6497 if self._match(TokenType.TRUE): 6498 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 6499 if self._match(TokenType.FALSE): 6500 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 6501 return self._parse_placeholder() 6502 6503 def _parse_star(self) -> t.Optional[exp.Expression]: 6504 if self._match(TokenType.STAR): 6505 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 6506 return self._parse_placeholder() 6507 6508 def _parse_parameter(self) -> exp.Parameter: 6509 this = self._parse_identifier() or self._parse_primary_or_var() 6510 return self.expression(exp.Parameter, this=this) 6511 6512 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 6513 if self._match_set(self.PLACEHOLDER_PARSERS): 6514 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 6515 if placeholder: 6516 return placeholder 6517 self._advance(-1) 6518 return None 6519 6520 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 6521 if not self._match_texts(keywords): 6522 return None 6523 if self._match(TokenType.L_PAREN, advance=False): 6524 return self._parse_wrapped_csv(self._parse_expression) 6525 6526 expression = self._parse_expression() 6527 return [expression] if expression else None 6528 6529 def _parse_csv( 6530 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 6531 ) -> t.List[exp.Expression]: 6532 parse_result = parse_method() 6533 items = [parse_result] if parse_result is not None else [] 6534 6535 while self._match(sep): 6536 self._add_comments(parse_result) 6537 parse_result = parse_method() 6538 if parse_result is not None: 6539 items.append(parse_result) 6540 6541 return items 6542 6543 def _parse_tokens( 6544 self, parse_method: t.Callable, expressions: t.Dict 6545 ) -> t.Optional[exp.Expression]: 6546 this = parse_method() 6547 6548 while self._match_set(expressions): 6549 this = self.expression( 6550 expressions[self._prev.token_type], 6551 this=this, 6552 comments=self._prev_comments, 6553 expression=parse_method(), 6554 ) 6555 6556 return this 6557 6558 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 6559 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 6560 6561 def _parse_wrapped_csv( 6562 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 6563 ) -> t.List[exp.Expression]: 6564 return self._parse_wrapped( 6565 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 6566 ) 6567 6568 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 6569 wrapped = self._match(TokenType.L_PAREN) 6570 if not wrapped and not optional: 6571 self.raise_error("Expecting (") 6572 parse_result = parse_method() 6573 if wrapped: 6574 self._match_r_paren() 6575 return parse_result 6576 6577 def _parse_expressions(self) -> t.List[exp.Expression]: 6578 return self._parse_csv(self._parse_expression) 6579 6580 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 6581 return self._parse_select() or self._parse_set_operations( 6582 self._parse_expression() if alias else self._parse_assignment() 6583 ) 6584 6585 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 6586 return self._parse_query_modifiers( 6587 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 6588 ) 6589 6590 def _parse_transaction(self) -> exp.Transaction | exp.Command: 6591 this = None 6592 if self._match_texts(self.TRANSACTION_KIND): 6593 this = self._prev.text 6594 6595 self._match_texts(("TRANSACTION", "WORK")) 6596 6597 modes = [] 6598 while True: 6599 mode = [] 6600 while self._match(TokenType.VAR): 6601 mode.append(self._prev.text) 6602 6603 if mode: 6604 modes.append(" ".join(mode)) 6605 if not self._match(TokenType.COMMA): 6606 break 6607 6608 return self.expression(exp.Transaction, this=this, modes=modes) 6609 6610 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 6611 chain = None 6612 savepoint = None 6613 is_rollback = self._prev.token_type == TokenType.ROLLBACK 6614 6615 self._match_texts(("TRANSACTION", "WORK")) 6616 6617 if self._match_text_seq("TO"): 6618 self._match_text_seq("SAVEPOINT") 6619 savepoint = self._parse_id_var() 6620 6621 if self._match(TokenType.AND): 6622 chain = not self._match_text_seq("NO") 6623 self._match_text_seq("CHAIN") 6624 6625 if is_rollback: 6626 return self.expression(exp.Rollback, savepoint=savepoint) 6627 6628 return self.expression(exp.Commit, chain=chain) 6629 6630 def _parse_refresh(self) -> exp.Refresh: 6631 self._match(TokenType.TABLE) 6632 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 6633 6634 def _parse_add_column(self) -> t.Optional[exp.Expression]: 6635 if not self._match_text_seq("ADD"): 6636 return None 6637 6638 self._match(TokenType.COLUMN) 6639 exists_column = self._parse_exists(not_=True) 6640 expression = self._parse_field_def() 6641 6642 if expression: 6643 expression.set("exists", exists_column) 6644 6645 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 6646 if self._match_texts(("FIRST", "AFTER")): 6647 position = self._prev.text 6648 column_position = self.expression( 6649 exp.ColumnPosition, this=self._parse_column(), position=position 6650 ) 6651 expression.set("position", column_position) 6652 6653 return expression 6654 6655 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 6656 drop = self._match(TokenType.DROP) and self._parse_drop() 6657 if drop and not isinstance(drop, exp.Command): 6658 drop.set("kind", drop.args.get("kind", "COLUMN")) 6659 return drop 6660 6661 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 6662 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 6663 return self.expression( 6664 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 6665 ) 6666 6667 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 6668 index = self._index - 1 6669 6670 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 6671 return self._parse_csv( 6672 lambda: self.expression( 6673 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 6674 ) 6675 ) 6676 6677 self._retreat(index) 6678 if not self.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN and self._match_text_seq("ADD"): 6679 return self._parse_wrapped_csv(self._parse_field_def, optional=True) 6680 6681 if self._match_text_seq("ADD", "COLUMNS"): 6682 schema = self._parse_schema() 6683 if schema: 6684 return [schema] 6685 return [] 6686 6687 return self._parse_wrapped_csv(self._parse_add_column, optional=True) 6688 6689 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 6690 if self._match_texts(self.ALTER_ALTER_PARSERS): 6691 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 6692 6693 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 6694 # keyword after ALTER we default to parsing this statement 6695 self._match(TokenType.COLUMN) 6696 column = self._parse_field(any_token=True) 6697 6698 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 6699 return self.expression(exp.AlterColumn, this=column, drop=True) 6700 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 6701 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 6702 if self._match(TokenType.COMMENT): 6703 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 6704 if self._match_text_seq("DROP", "NOT", "NULL"): 6705 return self.expression( 6706 exp.AlterColumn, 6707 this=column, 6708 drop=True, 6709 allow_null=True, 6710 ) 6711 if self._match_text_seq("SET", "NOT", "NULL"): 6712 return self.expression( 6713 exp.AlterColumn, 6714 this=column, 6715 allow_null=False, 6716 ) 6717 self._match_text_seq("SET", "DATA") 6718 self._match_text_seq("TYPE") 6719 return self.expression( 6720 exp.AlterColumn, 6721 this=column, 6722 dtype=self._parse_types(), 6723 collate=self._match(TokenType.COLLATE) and self._parse_term(), 6724 using=self._match(TokenType.USING) and self._parse_assignment(), 6725 ) 6726 6727 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 6728 if self._match_texts(("ALL", "EVEN", "AUTO")): 6729 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 6730 6731 self._match_text_seq("KEY", "DISTKEY") 6732 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 6733 6734 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 6735 if compound: 6736 self._match_text_seq("SORTKEY") 6737 6738 if self._match(TokenType.L_PAREN, advance=False): 6739 return self.expression( 6740 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 6741 ) 6742 6743 self._match_texts(("AUTO", "NONE")) 6744 return self.expression( 6745 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 6746 ) 6747 6748 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 6749 index = self._index - 1 6750 6751 partition_exists = self._parse_exists() 6752 if self._match(TokenType.PARTITION, advance=False): 6753 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 6754 6755 self._retreat(index) 6756 return self._parse_csv(self._parse_drop_column) 6757 6758 def _parse_alter_table_rename(self) -> t.Optional[exp.RenameTable | exp.RenameColumn]: 6759 if self._match(TokenType.COLUMN): 6760 exists = self._parse_exists() 6761 old_column = self._parse_column() 6762 to = self._match_text_seq("TO") 6763 new_column = self._parse_column() 6764 6765 if old_column is None or to is None or new_column is None: 6766 return None 6767 6768 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 6769 6770 self._match_text_seq("TO") 6771 return self.expression(exp.RenameTable, this=self._parse_table(schema=True)) 6772 6773 def _parse_alter_table_set(self) -> exp.AlterSet: 6774 alter_set = self.expression(exp.AlterSet) 6775 6776 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 6777 "TABLE", "PROPERTIES" 6778 ): 6779 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 6780 elif self._match_text_seq("FILESTREAM_ON", advance=False): 6781 alter_set.set("expressions", [self._parse_assignment()]) 6782 elif self._match_texts(("LOGGED", "UNLOGGED")): 6783 alter_set.set("option", exp.var(self._prev.text.upper())) 6784 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 6785 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 6786 elif self._match_text_seq("LOCATION"): 6787 alter_set.set("location", self._parse_field()) 6788 elif self._match_text_seq("ACCESS", "METHOD"): 6789 alter_set.set("access_method", self._parse_field()) 6790 elif self._match_text_seq("TABLESPACE"): 6791 alter_set.set("tablespace", self._parse_field()) 6792 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 6793 alter_set.set("file_format", [self._parse_field()]) 6794 elif self._match_text_seq("STAGE_FILE_FORMAT"): 6795 alter_set.set("file_format", self._parse_wrapped_options()) 6796 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 6797 alter_set.set("copy_options", self._parse_wrapped_options()) 6798 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 6799 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 6800 else: 6801 if self._match_text_seq("SERDE"): 6802 alter_set.set("serde", self._parse_field()) 6803 6804 alter_set.set("expressions", [self._parse_properties()]) 6805 6806 return alter_set 6807 6808 def _parse_alter(self) -> exp.Alter | exp.Command: 6809 start = self._prev 6810 6811 alter_token = self._match_set(self.ALTERABLES) and self._prev 6812 if not alter_token: 6813 return self._parse_as_command(start) 6814 6815 exists = self._parse_exists() 6816 only = self._match_text_seq("ONLY") 6817 this = self._parse_table(schema=True) 6818 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 6819 6820 if self._next: 6821 self._advance() 6822 6823 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 6824 if parser: 6825 actions = ensure_list(parser(self)) 6826 not_valid = self._match_text_seq("NOT", "VALID") 6827 options = self._parse_csv(self._parse_property) 6828 6829 if not self._curr and actions: 6830 return self.expression( 6831 exp.Alter, 6832 this=this, 6833 kind=alter_token.text.upper(), 6834 exists=exists, 6835 actions=actions, 6836 only=only, 6837 options=options, 6838 cluster=cluster, 6839 not_valid=not_valid, 6840 ) 6841 6842 return self._parse_as_command(start) 6843 6844 def _parse_merge(self) -> exp.Merge: 6845 self._match(TokenType.INTO) 6846 target = self._parse_table() 6847 6848 if target and self._match(TokenType.ALIAS, advance=False): 6849 target.set("alias", self._parse_table_alias()) 6850 6851 self._match(TokenType.USING) 6852 using = self._parse_table() 6853 6854 self._match(TokenType.ON) 6855 on = self._parse_assignment() 6856 6857 return self.expression( 6858 exp.Merge, 6859 this=target, 6860 using=using, 6861 on=on, 6862 expressions=self._parse_when_matched(), 6863 returning=self._parse_returning(), 6864 ) 6865 6866 def _parse_when_matched(self) -> t.List[exp.When]: 6867 whens = [] 6868 6869 while self._match(TokenType.WHEN): 6870 matched = not self._match(TokenType.NOT) 6871 self._match_text_seq("MATCHED") 6872 source = ( 6873 False 6874 if self._match_text_seq("BY", "TARGET") 6875 else self._match_text_seq("BY", "SOURCE") 6876 ) 6877 condition = self._parse_assignment() if self._match(TokenType.AND) else None 6878 6879 self._match(TokenType.THEN) 6880 6881 if self._match(TokenType.INSERT): 6882 this = self._parse_star() 6883 if this: 6884 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 6885 else: 6886 then = self.expression( 6887 exp.Insert, 6888 this=exp.var("ROW") if self._match_text_seq("ROW") else self._parse_value(), 6889 expression=self._match_text_seq("VALUES") and self._parse_value(), 6890 ) 6891 elif self._match(TokenType.UPDATE): 6892 expressions = self._parse_star() 6893 if expressions: 6894 then = self.expression(exp.Update, expressions=expressions) 6895 else: 6896 then = self.expression( 6897 exp.Update, 6898 expressions=self._match(TokenType.SET) 6899 and self._parse_csv(self._parse_equality), 6900 ) 6901 elif self._match(TokenType.DELETE): 6902 then = self.expression(exp.Var, this=self._prev.text) 6903 else: 6904 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 6905 6906 whens.append( 6907 self.expression( 6908 exp.When, 6909 matched=matched, 6910 source=source, 6911 condition=condition, 6912 then=then, 6913 ) 6914 ) 6915 return whens 6916 6917 def _parse_show(self) -> t.Optional[exp.Expression]: 6918 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 6919 if parser: 6920 return parser(self) 6921 return self._parse_as_command(self._prev) 6922 6923 def _parse_set_item_assignment( 6924 self, kind: t.Optional[str] = None 6925 ) -> t.Optional[exp.Expression]: 6926 index = self._index 6927 6928 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 6929 return self._parse_set_transaction(global_=kind == "GLOBAL") 6930 6931 left = self._parse_primary() or self._parse_column() 6932 assignment_delimiter = self._match_texts(("=", "TO")) 6933 6934 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 6935 self._retreat(index) 6936 return None 6937 6938 right = self._parse_statement() or self._parse_id_var() 6939 if isinstance(right, (exp.Column, exp.Identifier)): 6940 right = exp.var(right.name) 6941 6942 this = self.expression(exp.EQ, this=left, expression=right) 6943 return self.expression(exp.SetItem, this=this, kind=kind) 6944 6945 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 6946 self._match_text_seq("TRANSACTION") 6947 characteristics = self._parse_csv( 6948 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 6949 ) 6950 return self.expression( 6951 exp.SetItem, 6952 expressions=characteristics, 6953 kind="TRANSACTION", 6954 **{"global": global_}, # type: ignore 6955 ) 6956 6957 def _parse_set_item(self) -> t.Optional[exp.Expression]: 6958 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 6959 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 6960 6961 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 6962 index = self._index 6963 set_ = self.expression( 6964 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 6965 ) 6966 6967 if self._curr: 6968 self._retreat(index) 6969 return self._parse_as_command(self._prev) 6970 6971 return set_ 6972 6973 def _parse_var_from_options( 6974 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 6975 ) -> t.Optional[exp.Var]: 6976 start = self._curr 6977 if not start: 6978 return None 6979 6980 option = start.text.upper() 6981 continuations = options.get(option) 6982 6983 index = self._index 6984 self._advance() 6985 for keywords in continuations or []: 6986 if isinstance(keywords, str): 6987 keywords = (keywords,) 6988 6989 if self._match_text_seq(*keywords): 6990 option = f"{option} {' '.join(keywords)}" 6991 break 6992 else: 6993 if continuations or continuations is None: 6994 if raise_unmatched: 6995 self.raise_error(f"Unknown option {option}") 6996 6997 self._retreat(index) 6998 return None 6999 7000 return exp.var(option) 7001 7002 def _parse_as_command(self, start: Token) -> exp.Command: 7003 while self._curr: 7004 self._advance() 7005 text = self._find_sql(start, self._prev) 7006 size = len(start.text) 7007 self._warn_unsupported() 7008 return exp.Command(this=text[:size], expression=text[size:]) 7009 7010 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7011 settings = [] 7012 7013 self._match_l_paren() 7014 kind = self._parse_id_var() 7015 7016 if self._match(TokenType.L_PAREN): 7017 while True: 7018 key = self._parse_id_var() 7019 value = self._parse_primary() 7020 7021 if not key and value is None: 7022 break 7023 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7024 self._match(TokenType.R_PAREN) 7025 7026 self._match_r_paren() 7027 7028 return self.expression( 7029 exp.DictProperty, 7030 this=this, 7031 kind=kind.this if kind else None, 7032 settings=settings, 7033 ) 7034 7035 def _parse_dict_range(self, this: str) -> exp.DictRange: 7036 self._match_l_paren() 7037 has_min = self._match_text_seq("MIN") 7038 if has_min: 7039 min = self._parse_var() or self._parse_primary() 7040 self._match_text_seq("MAX") 7041 max = self._parse_var() or self._parse_primary() 7042 else: 7043 max = self._parse_var() or self._parse_primary() 7044 min = exp.Literal.number(0) 7045 self._match_r_paren() 7046 return self.expression(exp.DictRange, this=this, min=min, max=max) 7047 7048 def _parse_comprehension( 7049 self, this: t.Optional[exp.Expression] 7050 ) -> t.Optional[exp.Comprehension]: 7051 index = self._index 7052 expression = self._parse_column() 7053 if not self._match(TokenType.IN): 7054 self._retreat(index - 1) 7055 return None 7056 iterator = self._parse_column() 7057 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7058 return self.expression( 7059 exp.Comprehension, 7060 this=this, 7061 expression=expression, 7062 iterator=iterator, 7063 condition=condition, 7064 ) 7065 7066 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7067 if self._match(TokenType.HEREDOC_STRING): 7068 return self.expression(exp.Heredoc, this=self._prev.text) 7069 7070 if not self._match_text_seq("$"): 7071 return None 7072 7073 tags = ["$"] 7074 tag_text = None 7075 7076 if self._is_connected(): 7077 self._advance() 7078 tags.append(self._prev.text.upper()) 7079 else: 7080 self.raise_error("No closing $ found") 7081 7082 if tags[-1] != "$": 7083 if self._is_connected() and self._match_text_seq("$"): 7084 tag_text = tags[-1] 7085 tags.append("$") 7086 else: 7087 self.raise_error("No closing $ found") 7088 7089 heredoc_start = self._curr 7090 7091 while self._curr: 7092 if self._match_text_seq(*tags, advance=False): 7093 this = self._find_sql(heredoc_start, self._prev) 7094 self._advance(len(tags)) 7095 return self.expression(exp.Heredoc, this=this, tag=tag_text) 7096 7097 self._advance() 7098 7099 self.raise_error(f"No closing {''.join(tags)} found") 7100 return None 7101 7102 def _find_parser( 7103 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 7104 ) -> t.Optional[t.Callable]: 7105 if not self._curr: 7106 return None 7107 7108 index = self._index 7109 this = [] 7110 while True: 7111 # The current token might be multiple words 7112 curr = self._curr.text.upper() 7113 key = curr.split(" ") 7114 this.append(curr) 7115 7116 self._advance() 7117 result, trie = in_trie(trie, key) 7118 if result == TrieResult.FAILED: 7119 break 7120 7121 if result == TrieResult.EXISTS: 7122 subparser = parsers[" ".join(this)] 7123 return subparser 7124 7125 self._retreat(index) 7126 return None 7127 7128 def _match(self, token_type, advance=True, expression=None): 7129 if not self._curr: 7130 return None 7131 7132 if self._curr.token_type == token_type: 7133 if advance: 7134 self._advance() 7135 self._add_comments(expression) 7136 return True 7137 7138 return None 7139 7140 def _match_set(self, types, advance=True): 7141 if not self._curr: 7142 return None 7143 7144 if self._curr.token_type in types: 7145 if advance: 7146 self._advance() 7147 return True 7148 7149 return None 7150 7151 def _match_pair(self, token_type_a, token_type_b, advance=True): 7152 if not self._curr or not self._next: 7153 return None 7154 7155 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 7156 if advance: 7157 self._advance(2) 7158 return True 7159 7160 return None 7161 7162 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7163 if not self._match(TokenType.L_PAREN, expression=expression): 7164 self.raise_error("Expecting (") 7165 7166 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 7167 if not self._match(TokenType.R_PAREN, expression=expression): 7168 self.raise_error("Expecting )") 7169 7170 def _match_texts(self, texts, advance=True): 7171 if ( 7172 self._curr 7173 and self._curr.token_type != TokenType.STRING 7174 and self._curr.text.upper() in texts 7175 ): 7176 if advance: 7177 self._advance() 7178 return True 7179 return None 7180 7181 def _match_text_seq(self, *texts, advance=True): 7182 index = self._index 7183 for text in texts: 7184 if ( 7185 self._curr 7186 and self._curr.token_type != TokenType.STRING 7187 and self._curr.text.upper() == text 7188 ): 7189 self._advance() 7190 else: 7191 self._retreat(index) 7192 return None 7193 7194 if not advance: 7195 self._retreat(index) 7196 7197 return True 7198 7199 def _replace_lambda( 7200 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 7201 ) -> t.Optional[exp.Expression]: 7202 if not node: 7203 return node 7204 7205 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 7206 7207 for column in node.find_all(exp.Column): 7208 typ = lambda_types.get(column.parts[0].name) 7209 if typ is not None: 7210 dot_or_id = column.to_dot() if column.table else column.this 7211 7212 if typ: 7213 dot_or_id = self.expression( 7214 exp.Cast, 7215 this=dot_or_id, 7216 to=typ, 7217 ) 7218 7219 parent = column.parent 7220 7221 while isinstance(parent, exp.Dot): 7222 if not isinstance(parent.parent, exp.Dot): 7223 parent.replace(dot_or_id) 7224 break 7225 parent = parent.parent 7226 else: 7227 if column is node: 7228 node = dot_or_id 7229 else: 7230 column.replace(dot_or_id) 7231 return node 7232 7233 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 7234 start = self._prev 7235 7236 # Not to be confused with TRUNCATE(number, decimals) function call 7237 if self._match(TokenType.L_PAREN): 7238 self._retreat(self._index - 2) 7239 return self._parse_function() 7240 7241 # Clickhouse supports TRUNCATE DATABASE as well 7242 is_database = self._match(TokenType.DATABASE) 7243 7244 self._match(TokenType.TABLE) 7245 7246 exists = self._parse_exists(not_=False) 7247 7248 expressions = self._parse_csv( 7249 lambda: self._parse_table(schema=True, is_db_reference=is_database) 7250 ) 7251 7252 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7253 7254 if self._match_text_seq("RESTART", "IDENTITY"): 7255 identity = "RESTART" 7256 elif self._match_text_seq("CONTINUE", "IDENTITY"): 7257 identity = "CONTINUE" 7258 else: 7259 identity = None 7260 7261 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 7262 option = self._prev.text 7263 else: 7264 option = None 7265 7266 partition = self._parse_partition() 7267 7268 # Fallback case 7269 if self._curr: 7270 return self._parse_as_command(start) 7271 7272 return self.expression( 7273 exp.TruncateTable, 7274 expressions=expressions, 7275 is_database=is_database, 7276 exists=exists, 7277 cluster=cluster, 7278 identity=identity, 7279 option=option, 7280 partition=partition, 7281 ) 7282 7283 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 7284 this = self._parse_ordered(self._parse_opclass) 7285 7286 if not self._match(TokenType.WITH): 7287 return this 7288 7289 op = self._parse_var(any_token=True) 7290 7291 return self.expression(exp.WithOperator, this=this, op=op) 7292 7293 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 7294 self._match(TokenType.EQ) 7295 self._match(TokenType.L_PAREN) 7296 7297 opts: t.List[t.Optional[exp.Expression]] = [] 7298 while self._curr and not self._match(TokenType.R_PAREN): 7299 if self._match_text_seq("FORMAT_NAME", "="): 7300 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL, 7301 # so we parse it separately to use _parse_field() 7302 prop = self.expression( 7303 exp.Property, this=exp.var("FORMAT_NAME"), value=self._parse_field() 7304 ) 7305 opts.append(prop) 7306 else: 7307 opts.append(self._parse_property()) 7308 7309 self._match(TokenType.COMMA) 7310 7311 return opts 7312 7313 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 7314 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 7315 7316 options = [] 7317 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 7318 option = self._parse_var(any_token=True) 7319 prev = self._prev.text.upper() 7320 7321 # Different dialects might separate options and values by white space, "=" and "AS" 7322 self._match(TokenType.EQ) 7323 self._match(TokenType.ALIAS) 7324 7325 param = self.expression(exp.CopyParameter, this=option) 7326 7327 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 7328 TokenType.L_PAREN, advance=False 7329 ): 7330 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 7331 param.set("expressions", self._parse_wrapped_options()) 7332 elif prev == "FILE_FORMAT": 7333 # T-SQL's external file format case 7334 param.set("expression", self._parse_field()) 7335 else: 7336 param.set("expression", self._parse_unquoted_field()) 7337 7338 options.append(param) 7339 self._match(sep) 7340 7341 return options 7342 7343 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 7344 expr = self.expression(exp.Credentials) 7345 7346 if self._match_text_seq("STORAGE_INTEGRATION", "="): 7347 expr.set("storage", self._parse_field()) 7348 if self._match_text_seq("CREDENTIALS"): 7349 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 7350 creds = ( 7351 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 7352 ) 7353 expr.set("credentials", creds) 7354 if self._match_text_seq("ENCRYPTION"): 7355 expr.set("encryption", self._parse_wrapped_options()) 7356 if self._match_text_seq("IAM_ROLE"): 7357 expr.set("iam_role", self._parse_field()) 7358 if self._match_text_seq("REGION"): 7359 expr.set("region", self._parse_field()) 7360 7361 return expr 7362 7363 def _parse_file_location(self) -> t.Optional[exp.Expression]: 7364 return self._parse_field() 7365 7366 def _parse_copy(self) -> exp.Copy | exp.Command: 7367 start = self._prev 7368 7369 self._match(TokenType.INTO) 7370 7371 this = ( 7372 self._parse_select(nested=True, parse_subquery_alias=False) 7373 if self._match(TokenType.L_PAREN, advance=False) 7374 else self._parse_table(schema=True) 7375 ) 7376 7377 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 7378 7379 files = self._parse_csv(self._parse_file_location) 7380 credentials = self._parse_credentials() 7381 7382 self._match_text_seq("WITH") 7383 7384 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 7385 7386 # Fallback case 7387 if self._curr: 7388 return self._parse_as_command(start) 7389 7390 return self.expression( 7391 exp.Copy, 7392 this=this, 7393 kind=kind, 7394 credentials=credentials, 7395 files=files, 7396 params=params, 7397 ) 7398 7399 def _parse_normalize(self) -> exp.Normalize: 7400 return self.expression( 7401 exp.Normalize, 7402 this=self._parse_bitwise(), 7403 form=self._match(TokenType.COMMA) and self._parse_var(), 7404 ) 7405 7406 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 7407 if self._match_text_seq("COLUMNS", "(", advance=False): 7408 this = self._parse_function() 7409 if isinstance(this, exp.Columns): 7410 this.set("unpack", True) 7411 return this 7412 7413 return self.expression( 7414 exp.Star, 7415 **{ # type: ignore 7416 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 7417 "replace": self._parse_star_op("REPLACE"), 7418 "rename": self._parse_star_op("RENAME"), 7419 }, 7420 ) 7421 7422 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 7423 privilege_parts = [] 7424 7425 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 7426 # (end of privilege list) or L_PAREN (start of column list) are met 7427 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 7428 privilege_parts.append(self._curr.text.upper()) 7429 self._advance() 7430 7431 this = exp.var(" ".join(privilege_parts)) 7432 expressions = ( 7433 self._parse_wrapped_csv(self._parse_column) 7434 if self._match(TokenType.L_PAREN, advance=False) 7435 else None 7436 ) 7437 7438 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 7439 7440 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 7441 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 7442 principal = self._parse_id_var() 7443 7444 if not principal: 7445 return None 7446 7447 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 7448 7449 def _parse_grant(self) -> exp.Grant | exp.Command: 7450 start = self._prev 7451 7452 privileges = self._parse_csv(self._parse_grant_privilege) 7453 7454 self._match(TokenType.ON) 7455 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 7456 7457 # Attempt to parse the securable e.g. MySQL allows names 7458 # such as "foo.*", "*.*" which are not easily parseable yet 7459 securable = self._try_parse(self._parse_table_parts) 7460 7461 if not securable or not self._match_text_seq("TO"): 7462 return self._parse_as_command(start) 7463 7464 principals = self._parse_csv(self._parse_grant_principal) 7465 7466 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 7467 7468 if self._curr: 7469 return self._parse_as_command(start) 7470 7471 return self.expression( 7472 exp.Grant, 7473 privileges=privileges, 7474 kind=kind, 7475 securable=securable, 7476 principals=principals, 7477 grant_option=grant_option, 7478 ) 7479 7480 def _parse_overlay(self) -> exp.Overlay: 7481 return self.expression( 7482 exp.Overlay, 7483 **{ # type: ignore 7484 "this": self._parse_bitwise(), 7485 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 7486 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 7487 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 7488 }, 7489 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1358 def __init__( 1359 self, 1360 error_level: t.Optional[ErrorLevel] = None, 1361 error_message_context: int = 100, 1362 max_errors: int = 3, 1363 dialect: DialectType = None, 1364 ): 1365 from sqlglot.dialects import Dialect 1366 1367 self.error_level = error_level or ErrorLevel.IMMEDIATE 1368 self.error_message_context = error_message_context 1369 self.max_errors = max_errors 1370 self.dialect = Dialect.get_or_raise(dialect) 1371 self.reset()
1383 def parse( 1384 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1385 ) -> t.List[t.Optional[exp.Expression]]: 1386 """ 1387 Parses a list of tokens and returns a list of syntax trees, one tree 1388 per parsed SQL statement. 1389 1390 Args: 1391 raw_tokens: The list of tokens. 1392 sql: The original SQL string, used to produce helpful debug messages. 1393 1394 Returns: 1395 The list of the produced syntax trees. 1396 """ 1397 return self._parse( 1398 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1399 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1401 def parse_into( 1402 self, 1403 expression_types: exp.IntoType, 1404 raw_tokens: t.List[Token], 1405 sql: t.Optional[str] = None, 1406 ) -> t.List[t.Optional[exp.Expression]]: 1407 """ 1408 Parses a list of tokens into a given Expression type. If a collection of Expression 1409 types is given instead, this method will try to parse the token list into each one 1410 of them, stopping at the first for which the parsing succeeds. 1411 1412 Args: 1413 expression_types: The expression type(s) to try and parse the token list into. 1414 raw_tokens: The list of tokens. 1415 sql: The original SQL string, used to produce helpful debug messages. 1416 1417 Returns: 1418 The target Expression. 1419 """ 1420 errors = [] 1421 for expression_type in ensure_list(expression_types): 1422 parser = self.EXPRESSION_PARSERS.get(expression_type) 1423 if not parser: 1424 raise TypeError(f"No parser registered for {expression_type}") 1425 1426 try: 1427 return self._parse(parser, raw_tokens, sql) 1428 except ParseError as e: 1429 e.errors[0]["into_expression"] = expression_type 1430 errors.append(e) 1431 1432 raise ParseError( 1433 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1434 errors=merge_errors(errors), 1435 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1475 def check_errors(self) -> None: 1476 """Logs or raises any found errors, depending on the chosen error level setting.""" 1477 if self.error_level == ErrorLevel.WARN: 1478 for error in self.errors: 1479 logger.error(str(error)) 1480 elif self.error_level == ErrorLevel.RAISE and self.errors: 1481 raise ParseError( 1482 concat_messages(self.errors, self.max_errors), 1483 errors=merge_errors(self.errors), 1484 )
Logs or raises any found errors, depending on the chosen error level setting.
1486 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1487 """ 1488 Appends an error in the list of recorded errors or raises it, depending on the chosen 1489 error level setting. 1490 """ 1491 token = token or self._curr or self._prev or Token.string("") 1492 start = token.start 1493 end = token.end + 1 1494 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1495 highlight = self.sql[start:end] 1496 end_context = self.sql[end : end + self.error_message_context] 1497 1498 error = ParseError.new( 1499 f"{message}. Line {token.line}, Col: {token.col}.\n" 1500 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1501 description=message, 1502 line=token.line, 1503 col=token.col, 1504 start_context=start_context, 1505 highlight=highlight, 1506 end_context=end_context, 1507 ) 1508 1509 if self.error_level == ErrorLevel.IMMEDIATE: 1510 raise error 1511 1512 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1514 def expression( 1515 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1516 ) -> E: 1517 """ 1518 Creates a new, validated Expression. 1519 1520 Args: 1521 exp_class: The expression class to instantiate. 1522 comments: An optional list of comments to attach to the expression. 1523 kwargs: The arguments to set for the expression along with their respective values. 1524 1525 Returns: 1526 The target expression. 1527 """ 1528 instance = exp_class(**kwargs) 1529 instance.add_comments(comments) if comments else self._add_comments(instance) 1530 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1537 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1538 """ 1539 Validates an Expression, making sure that all its mandatory arguments are set. 1540 1541 Args: 1542 expression: The expression to validate. 1543 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1544 1545 Returns: 1546 The validated expression. 1547 """ 1548 if self.error_level != ErrorLevel.IGNORE: 1549 for error_message in expression.error_messages(args): 1550 self.raise_error(error_message) 1551 1552 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.