sqlglot.parser
1from __future__ import annotations 2 3import itertools 4import logging 5import re 6import typing as t 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ( 11 ErrorLevel, 12 ParseError, 13 TokenError, 14 concat_messages, 15 highlight_sql, 16 merge_errors, 17) 18from sqlglot.helper import apply_index_offset, ensure_list, seq_get 19from sqlglot.time import format_time 20from sqlglot.tokens import Token, Tokenizer, TokenType 21from sqlglot.trie import TrieResult, in_trie, new_trie 22 23if t.TYPE_CHECKING: 24 from sqlglot._typing import E, Lit 25 from sqlglot.dialects.dialect import Dialect, DialectType 26 27 T = t.TypeVar("T") 28 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 29 30logger = logging.getLogger("sqlglot") 31 32OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 33 34# Used to detect alphabetical characters and +/- in timestamp literals 35TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 36 37 38def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 39 if len(args) == 1 and args[0].is_star: 40 return exp.StarMap(this=args[0]) 41 42 keys = [] 43 values = [] 44 for i in range(0, len(args), 2): 45 keys.append(args[i]) 46 values.append(args[i + 1]) 47 48 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 49 50 51def build_like(args: t.List) -> exp.Escape | exp.Like: 52 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 53 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 54 55 56def binary_range_parser( 57 expr_type: t.Type[exp.Expression], reverse_args: bool = False 58) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 59 def _parse_binary_range( 60 self: Parser, this: t.Optional[exp.Expression] 61 ) -> t.Optional[exp.Expression]: 62 expression = self._parse_bitwise() 63 if reverse_args: 64 this, expression = expression, this 65 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 66 67 return _parse_binary_range 68 69 70def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 71 # Default argument order is base, expression 72 this = seq_get(args, 0) 73 expression = seq_get(args, 1) 74 75 if expression: 76 if not dialect.LOG_BASE_FIRST: 77 this, expression = expression, this 78 return exp.Log(this=this, expression=expression) 79 80 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 81 82 83def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 84 arg = seq_get(args, 0) 85 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 86 87 88def build_lower(args: t.List) -> exp.Lower | exp.Hex: 89 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 90 arg = seq_get(args, 0) 91 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 92 93 94def build_upper(args: t.List) -> exp.Upper | exp.Hex: 95 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 96 arg = seq_get(args, 0) 97 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 98 99 100def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 101 def _builder(args: t.List, dialect: Dialect) -> E: 102 expression = expr_type( 103 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 104 ) 105 if len(args) > 2 and expr_type is exp.JSONExtract: 106 expression.set("expressions", args[2:]) 107 if expr_type is exp.JSONExtractScalar: 108 expression.set("scalar_only", dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY) 109 110 return expression 111 112 return _builder 113 114 115def build_mod(args: t.List) -> exp.Mod: 116 this = seq_get(args, 0) 117 expression = seq_get(args, 1) 118 119 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 120 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 121 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 122 123 return exp.Mod(this=this, expression=expression) 124 125 126def build_pad(args: t.List, is_left: bool = True): 127 return exp.Pad( 128 this=seq_get(args, 0), 129 expression=seq_get(args, 1), 130 fill_pattern=seq_get(args, 2), 131 is_left=is_left, 132 ) 133 134 135def build_array_constructor( 136 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 137) -> exp.Expression: 138 array_exp = exp_class(expressions=args) 139 140 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 141 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 142 143 return array_exp 144 145 146def build_convert_timezone( 147 args: t.List, default_source_tz: t.Optional[str] = None 148) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 149 if len(args) == 2: 150 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 151 return exp.ConvertTimezone( 152 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 153 ) 154 155 return exp.ConvertTimezone.from_arg_list(args) 156 157 158def build_trim(args: t.List, is_left: bool = True, reverse_args: bool = False): 159 this, expression = seq_get(args, 0), seq_get(args, 1) 160 161 if expression and reverse_args: 162 this, expression = expression, this 163 164 return exp.Trim(this=this, expression=expression, position="LEADING" if is_left else "TRAILING") 165 166 167def build_coalesce( 168 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 169) -> exp.Coalesce: 170 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 171 172 173def build_locate_strposition(args: t.List): 174 return exp.StrPosition( 175 this=seq_get(args, 1), 176 substr=seq_get(args, 0), 177 position=seq_get(args, 2), 178 ) 179 180 181class _Parser(type): 182 def __new__(cls, clsname, bases, attrs): 183 klass = super().__new__(cls, clsname, bases, attrs) 184 185 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 186 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 187 188 return klass 189 190 191class Parser(metaclass=_Parser): 192 """ 193 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 194 195 Args: 196 error_level: The desired error level. 197 Default: ErrorLevel.IMMEDIATE 198 error_message_context: The amount of context to capture from a query string when displaying 199 the error message (in number of characters). 200 Default: 100 201 max_errors: Maximum number of error messages to include in a raised ParseError. 202 This is only relevant if error_level is ErrorLevel.RAISE. 203 Default: 3 204 """ 205 206 FUNCTIONS: t.Dict[str, t.Callable] = { 207 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 208 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 209 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 210 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 211 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 212 ), 213 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 214 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 215 ), 216 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 217 "CONCAT": lambda args, dialect: exp.Concat( 218 expressions=args, 219 safe=not dialect.STRICT_STRING_CONCAT, 220 coalesce=dialect.CONCAT_COALESCE, 221 ), 222 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 223 expressions=args, 224 safe=not dialect.STRICT_STRING_CONCAT, 225 coalesce=dialect.CONCAT_COALESCE, 226 ), 227 "CONVERT_TIMEZONE": build_convert_timezone, 228 "DATE_TO_DATE_STR": lambda args: exp.Cast( 229 this=seq_get(args, 0), 230 to=exp.DataType(this=exp.DataType.Type.TEXT), 231 ), 232 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 233 start=seq_get(args, 0), 234 end=seq_get(args, 1), 235 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 236 ), 237 "GENERATE_UUID": lambda args, dialect: exp.Uuid( 238 is_string=dialect.UUID_IS_STRING_TYPE or None 239 ), 240 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 241 "GREATEST": lambda args, dialect: exp.Greatest( 242 this=seq_get(args, 0), 243 expressions=args[1:], 244 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 245 ), 246 "LEAST": lambda args, dialect: exp.Least( 247 this=seq_get(args, 0), 248 expressions=args[1:], 249 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 250 ), 251 "HEX": build_hex, 252 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 253 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 254 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 255 "JSON_KEYS": lambda args, dialect: exp.JSONKeys( 256 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 257 ), 258 "LIKE": build_like, 259 "LOG": build_logarithm, 260 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 261 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 262 "LOWER": build_lower, 263 "LPAD": lambda args: build_pad(args), 264 "LEFTPAD": lambda args: build_pad(args), 265 "LTRIM": lambda args: build_trim(args), 266 "MOD": build_mod, 267 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 268 "RPAD": lambda args: build_pad(args, is_left=False), 269 "RTRIM": lambda args: build_trim(args, is_left=False), 270 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 271 if len(args) != 2 272 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 273 "STRPOS": exp.StrPosition.from_arg_list, 274 "CHARINDEX": lambda args: build_locate_strposition(args), 275 "INSTR": exp.StrPosition.from_arg_list, 276 "LOCATE": lambda args: build_locate_strposition(args), 277 "TIME_TO_TIME_STR": lambda args: exp.Cast( 278 this=seq_get(args, 0), 279 to=exp.DataType(this=exp.DataType.Type.TEXT), 280 ), 281 "TO_HEX": build_hex, 282 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 283 this=exp.Cast( 284 this=seq_get(args, 0), 285 to=exp.DataType(this=exp.DataType.Type.TEXT), 286 ), 287 start=exp.Literal.number(1), 288 length=exp.Literal.number(10), 289 ), 290 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 291 "UPPER": build_upper, 292 "UUID": lambda args, dialect: exp.Uuid(is_string=dialect.UUID_IS_STRING_TYPE or None), 293 "VAR_MAP": build_var_map, 294 } 295 296 NO_PAREN_FUNCTIONS = { 297 TokenType.CURRENT_DATE: exp.CurrentDate, 298 TokenType.CURRENT_DATETIME: exp.CurrentDate, 299 TokenType.CURRENT_TIME: exp.CurrentTime, 300 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 301 TokenType.CURRENT_USER: exp.CurrentUser, 302 TokenType.LOCALTIME: exp.Localtime, 303 TokenType.LOCALTIMESTAMP: exp.Localtimestamp, 304 TokenType.CURRENT_ROLE: exp.CurrentRole, 305 } 306 307 STRUCT_TYPE_TOKENS = { 308 TokenType.FILE, 309 TokenType.NESTED, 310 TokenType.OBJECT, 311 TokenType.STRUCT, 312 TokenType.UNION, 313 } 314 315 NESTED_TYPE_TOKENS = { 316 TokenType.ARRAY, 317 TokenType.LIST, 318 TokenType.LOWCARDINALITY, 319 TokenType.MAP, 320 TokenType.NULLABLE, 321 TokenType.RANGE, 322 *STRUCT_TYPE_TOKENS, 323 } 324 325 ENUM_TYPE_TOKENS = { 326 TokenType.DYNAMIC, 327 TokenType.ENUM, 328 TokenType.ENUM8, 329 TokenType.ENUM16, 330 } 331 332 AGGREGATE_TYPE_TOKENS = { 333 TokenType.AGGREGATEFUNCTION, 334 TokenType.SIMPLEAGGREGATEFUNCTION, 335 } 336 337 TYPE_TOKENS = { 338 TokenType.BIT, 339 TokenType.BOOLEAN, 340 TokenType.TINYINT, 341 TokenType.UTINYINT, 342 TokenType.SMALLINT, 343 TokenType.USMALLINT, 344 TokenType.INT, 345 TokenType.UINT, 346 TokenType.BIGINT, 347 TokenType.UBIGINT, 348 TokenType.BIGNUM, 349 TokenType.INT128, 350 TokenType.UINT128, 351 TokenType.INT256, 352 TokenType.UINT256, 353 TokenType.MEDIUMINT, 354 TokenType.UMEDIUMINT, 355 TokenType.FIXEDSTRING, 356 TokenType.FLOAT, 357 TokenType.DOUBLE, 358 TokenType.UDOUBLE, 359 TokenType.CHAR, 360 TokenType.NCHAR, 361 TokenType.VARCHAR, 362 TokenType.NVARCHAR, 363 TokenType.BPCHAR, 364 TokenType.TEXT, 365 TokenType.MEDIUMTEXT, 366 TokenType.LONGTEXT, 367 TokenType.BLOB, 368 TokenType.MEDIUMBLOB, 369 TokenType.LONGBLOB, 370 TokenType.BINARY, 371 TokenType.VARBINARY, 372 TokenType.JSON, 373 TokenType.JSONB, 374 TokenType.INTERVAL, 375 TokenType.TINYBLOB, 376 TokenType.TINYTEXT, 377 TokenType.TIME, 378 TokenType.TIMETZ, 379 TokenType.TIME_NS, 380 TokenType.TIMESTAMP, 381 TokenType.TIMESTAMP_S, 382 TokenType.TIMESTAMP_MS, 383 TokenType.TIMESTAMP_NS, 384 TokenType.TIMESTAMPTZ, 385 TokenType.TIMESTAMPLTZ, 386 TokenType.TIMESTAMPNTZ, 387 TokenType.DATETIME, 388 TokenType.DATETIME2, 389 TokenType.DATETIME64, 390 TokenType.SMALLDATETIME, 391 TokenType.DATE, 392 TokenType.DATE32, 393 TokenType.INT4RANGE, 394 TokenType.INT4MULTIRANGE, 395 TokenType.INT8RANGE, 396 TokenType.INT8MULTIRANGE, 397 TokenType.NUMRANGE, 398 TokenType.NUMMULTIRANGE, 399 TokenType.TSRANGE, 400 TokenType.TSMULTIRANGE, 401 TokenType.TSTZRANGE, 402 TokenType.TSTZMULTIRANGE, 403 TokenType.DATERANGE, 404 TokenType.DATEMULTIRANGE, 405 TokenType.DECIMAL, 406 TokenType.DECIMAL32, 407 TokenType.DECIMAL64, 408 TokenType.DECIMAL128, 409 TokenType.DECIMAL256, 410 TokenType.DECFLOAT, 411 TokenType.UDECIMAL, 412 TokenType.BIGDECIMAL, 413 TokenType.UUID, 414 TokenType.GEOGRAPHY, 415 TokenType.GEOGRAPHYPOINT, 416 TokenType.GEOMETRY, 417 TokenType.POINT, 418 TokenType.RING, 419 TokenType.LINESTRING, 420 TokenType.MULTILINESTRING, 421 TokenType.POLYGON, 422 TokenType.MULTIPOLYGON, 423 TokenType.HLLSKETCH, 424 TokenType.HSTORE, 425 TokenType.PSEUDO_TYPE, 426 TokenType.SUPER, 427 TokenType.SERIAL, 428 TokenType.SMALLSERIAL, 429 TokenType.BIGSERIAL, 430 TokenType.XML, 431 TokenType.YEAR, 432 TokenType.USERDEFINED, 433 TokenType.MONEY, 434 TokenType.SMALLMONEY, 435 TokenType.ROWVERSION, 436 TokenType.IMAGE, 437 TokenType.VARIANT, 438 TokenType.VECTOR, 439 TokenType.VOID, 440 TokenType.OBJECT, 441 TokenType.OBJECT_IDENTIFIER, 442 TokenType.INET, 443 TokenType.IPADDRESS, 444 TokenType.IPPREFIX, 445 TokenType.IPV4, 446 TokenType.IPV6, 447 TokenType.UNKNOWN, 448 TokenType.NOTHING, 449 TokenType.NULL, 450 TokenType.NAME, 451 TokenType.TDIGEST, 452 TokenType.DYNAMIC, 453 *ENUM_TYPE_TOKENS, 454 *NESTED_TYPE_TOKENS, 455 *AGGREGATE_TYPE_TOKENS, 456 } 457 458 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 459 TokenType.BIGINT: TokenType.UBIGINT, 460 TokenType.INT: TokenType.UINT, 461 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 462 TokenType.SMALLINT: TokenType.USMALLINT, 463 TokenType.TINYINT: TokenType.UTINYINT, 464 TokenType.DECIMAL: TokenType.UDECIMAL, 465 TokenType.DOUBLE: TokenType.UDOUBLE, 466 } 467 468 SUBQUERY_PREDICATES = { 469 TokenType.ANY: exp.Any, 470 TokenType.ALL: exp.All, 471 TokenType.EXISTS: exp.Exists, 472 TokenType.SOME: exp.Any, 473 } 474 475 RESERVED_TOKENS = { 476 *Tokenizer.SINGLE_TOKENS.values(), 477 TokenType.SELECT, 478 } - {TokenType.IDENTIFIER} 479 480 DB_CREATABLES = { 481 TokenType.DATABASE, 482 TokenType.DICTIONARY, 483 TokenType.FILE_FORMAT, 484 TokenType.MODEL, 485 TokenType.NAMESPACE, 486 TokenType.SCHEMA, 487 TokenType.SEMANTIC_VIEW, 488 TokenType.SEQUENCE, 489 TokenType.SINK, 490 TokenType.SOURCE, 491 TokenType.STAGE, 492 TokenType.STORAGE_INTEGRATION, 493 TokenType.STREAMLIT, 494 TokenType.TABLE, 495 TokenType.TAG, 496 TokenType.VIEW, 497 TokenType.WAREHOUSE, 498 } 499 500 CREATABLES = { 501 TokenType.COLUMN, 502 TokenType.CONSTRAINT, 503 TokenType.FOREIGN_KEY, 504 TokenType.FUNCTION, 505 TokenType.INDEX, 506 TokenType.PROCEDURE, 507 *DB_CREATABLES, 508 } 509 510 ALTERABLES = { 511 TokenType.INDEX, 512 TokenType.TABLE, 513 TokenType.VIEW, 514 TokenType.SESSION, 515 } 516 517 # Tokens that can represent identifiers 518 ID_VAR_TOKENS = { 519 TokenType.ALL, 520 TokenType.ANALYZE, 521 TokenType.ATTACH, 522 TokenType.VAR, 523 TokenType.ANTI, 524 TokenType.APPLY, 525 TokenType.ASC, 526 TokenType.ASOF, 527 TokenType.AUTO_INCREMENT, 528 TokenType.BEGIN, 529 TokenType.BPCHAR, 530 TokenType.CACHE, 531 TokenType.CASE, 532 TokenType.COLLATE, 533 TokenType.COMMAND, 534 TokenType.COMMENT, 535 TokenType.COMMIT, 536 TokenType.CONSTRAINT, 537 TokenType.COPY, 538 TokenType.CUBE, 539 TokenType.CURRENT_SCHEMA, 540 TokenType.DEFAULT, 541 TokenType.DELETE, 542 TokenType.DESC, 543 TokenType.DESCRIBE, 544 TokenType.DETACH, 545 TokenType.DICTIONARY, 546 TokenType.DIV, 547 TokenType.END, 548 TokenType.EXECUTE, 549 TokenType.EXPORT, 550 TokenType.ESCAPE, 551 TokenType.FALSE, 552 TokenType.FIRST, 553 TokenType.FILTER, 554 TokenType.FINAL, 555 TokenType.FORMAT, 556 TokenType.FULL, 557 TokenType.GET, 558 TokenType.IDENTIFIER, 559 TokenType.IS, 560 TokenType.ISNULL, 561 TokenType.INTERVAL, 562 TokenType.KEEP, 563 TokenType.KILL, 564 TokenType.LEFT, 565 TokenType.LIMIT, 566 TokenType.LOAD, 567 TokenType.LOCK, 568 TokenType.MATCH, 569 TokenType.MERGE, 570 TokenType.NATURAL, 571 TokenType.NEXT, 572 TokenType.OFFSET, 573 TokenType.OPERATOR, 574 TokenType.ORDINALITY, 575 TokenType.OVER, 576 TokenType.OVERLAPS, 577 TokenType.OVERWRITE, 578 TokenType.PARTITION, 579 TokenType.PERCENT, 580 TokenType.PIVOT, 581 TokenType.PRAGMA, 582 TokenType.PUT, 583 TokenType.RANGE, 584 TokenType.RECURSIVE, 585 TokenType.REFERENCES, 586 TokenType.REFRESH, 587 TokenType.RENAME, 588 TokenType.REPLACE, 589 TokenType.RIGHT, 590 TokenType.ROLLUP, 591 TokenType.ROW, 592 TokenType.ROWS, 593 TokenType.SEMI, 594 TokenType.SET, 595 TokenType.SETTINGS, 596 TokenType.SHOW, 597 TokenType.TEMPORARY, 598 TokenType.TOP, 599 TokenType.TRUE, 600 TokenType.TRUNCATE, 601 TokenType.UNIQUE, 602 TokenType.UNNEST, 603 TokenType.UNPIVOT, 604 TokenType.UPDATE, 605 TokenType.USE, 606 TokenType.VOLATILE, 607 TokenType.WINDOW, 608 *ALTERABLES, 609 *CREATABLES, 610 *SUBQUERY_PREDICATES, 611 *TYPE_TOKENS, 612 *NO_PAREN_FUNCTIONS, 613 } 614 ID_VAR_TOKENS.remove(TokenType.UNION) 615 616 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 617 TokenType.ANTI, 618 TokenType.ASOF, 619 TokenType.FULL, 620 TokenType.LEFT, 621 TokenType.LOCK, 622 TokenType.NATURAL, 623 TokenType.RIGHT, 624 TokenType.SEMI, 625 TokenType.WINDOW, 626 } 627 628 ALIAS_TOKENS = ID_VAR_TOKENS 629 630 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 631 632 ARRAY_CONSTRUCTORS = { 633 "ARRAY": exp.Array, 634 "LIST": exp.List, 635 } 636 637 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 638 639 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 640 641 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 642 643 FUNC_TOKENS = { 644 TokenType.COLLATE, 645 TokenType.COMMAND, 646 TokenType.CURRENT_DATE, 647 TokenType.CURRENT_DATETIME, 648 TokenType.CURRENT_SCHEMA, 649 TokenType.CURRENT_TIMESTAMP, 650 TokenType.CURRENT_TIME, 651 TokenType.CURRENT_USER, 652 TokenType.CURRENT_CATALOG, 653 TokenType.FILTER, 654 TokenType.FIRST, 655 TokenType.FORMAT, 656 TokenType.GET, 657 TokenType.GLOB, 658 TokenType.IDENTIFIER, 659 TokenType.INDEX, 660 TokenType.ISNULL, 661 TokenType.ILIKE, 662 TokenType.INSERT, 663 TokenType.LIKE, 664 TokenType.LOCALTIME, 665 TokenType.LOCALTIMESTAMP, 666 TokenType.MERGE, 667 TokenType.NEXT, 668 TokenType.OFFSET, 669 TokenType.PRIMARY_KEY, 670 TokenType.RANGE, 671 TokenType.REPLACE, 672 TokenType.RLIKE, 673 TokenType.ROW, 674 TokenType.SESSION_USER, 675 TokenType.UNNEST, 676 TokenType.VAR, 677 TokenType.LEFT, 678 TokenType.RIGHT, 679 TokenType.SEQUENCE, 680 TokenType.DATE, 681 TokenType.DATETIME, 682 TokenType.TABLE, 683 TokenType.TIMESTAMP, 684 TokenType.TIMESTAMPTZ, 685 TokenType.TRUNCATE, 686 TokenType.UTC_DATE, 687 TokenType.UTC_TIME, 688 TokenType.UTC_TIMESTAMP, 689 TokenType.WINDOW, 690 TokenType.XOR, 691 *TYPE_TOKENS, 692 *SUBQUERY_PREDICATES, 693 } 694 695 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 696 TokenType.AND: exp.And, 697 } 698 699 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 700 TokenType.COLON_EQ: exp.PropertyEQ, 701 } 702 703 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 704 TokenType.OR: exp.Or, 705 } 706 707 EQUALITY = { 708 TokenType.EQ: exp.EQ, 709 TokenType.NEQ: exp.NEQ, 710 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 711 } 712 713 COMPARISON = { 714 TokenType.GT: exp.GT, 715 TokenType.GTE: exp.GTE, 716 TokenType.LT: exp.LT, 717 TokenType.LTE: exp.LTE, 718 } 719 720 BITWISE = { 721 TokenType.AMP: exp.BitwiseAnd, 722 TokenType.CARET: exp.BitwiseXor, 723 TokenType.PIPE: exp.BitwiseOr, 724 } 725 726 TERM = { 727 TokenType.DASH: exp.Sub, 728 TokenType.PLUS: exp.Add, 729 TokenType.MOD: exp.Mod, 730 TokenType.COLLATE: exp.Collate, 731 } 732 733 FACTOR = { 734 TokenType.DIV: exp.IntDiv, 735 TokenType.LR_ARROW: exp.Distance, 736 TokenType.SLASH: exp.Div, 737 TokenType.STAR: exp.Mul, 738 } 739 740 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 741 742 TIMES = { 743 TokenType.TIME, 744 TokenType.TIMETZ, 745 } 746 747 TIMESTAMPS = { 748 TokenType.TIMESTAMP, 749 TokenType.TIMESTAMPNTZ, 750 TokenType.TIMESTAMPTZ, 751 TokenType.TIMESTAMPLTZ, 752 *TIMES, 753 } 754 755 SET_OPERATIONS = { 756 TokenType.UNION, 757 TokenType.INTERSECT, 758 TokenType.EXCEPT, 759 } 760 761 JOIN_METHODS = { 762 TokenType.ASOF, 763 TokenType.NATURAL, 764 TokenType.POSITIONAL, 765 } 766 767 JOIN_SIDES = { 768 TokenType.LEFT, 769 TokenType.RIGHT, 770 TokenType.FULL, 771 } 772 773 JOIN_KINDS = { 774 TokenType.ANTI, 775 TokenType.CROSS, 776 TokenType.INNER, 777 TokenType.OUTER, 778 TokenType.SEMI, 779 TokenType.STRAIGHT_JOIN, 780 } 781 782 JOIN_HINTS: t.Set[str] = set() 783 784 LAMBDAS = { 785 TokenType.ARROW: lambda self, expressions: self.expression( 786 exp.Lambda, 787 this=self._replace_lambda( 788 self._parse_disjunction(), 789 expressions, 790 ), 791 expressions=expressions, 792 ), 793 TokenType.FARROW: lambda self, expressions: self.expression( 794 exp.Kwarg, 795 this=exp.var(expressions[0].name), 796 expression=self._parse_disjunction(), 797 ), 798 } 799 800 COLUMN_OPERATORS = { 801 TokenType.DOT: None, 802 TokenType.DOTCOLON: lambda self, this, to: self.expression( 803 exp.JSONCast, 804 this=this, 805 to=to, 806 ), 807 TokenType.DCOLON: lambda self, this, to: self.build_cast( 808 strict=self.STRICT_CAST, this=this, to=to 809 ), 810 TokenType.ARROW: lambda self, this, path: self.expression( 811 exp.JSONExtract, 812 this=this, 813 expression=self.dialect.to_json_path(path), 814 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 815 ), 816 TokenType.DARROW: lambda self, this, path: self.expression( 817 exp.JSONExtractScalar, 818 this=this, 819 expression=self.dialect.to_json_path(path), 820 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 821 scalar_only=self.dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY, 822 ), 823 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 824 exp.JSONBExtract, 825 this=this, 826 expression=path, 827 ), 828 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 829 exp.JSONBExtractScalar, 830 this=this, 831 expression=path, 832 ), 833 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 834 exp.JSONBContains, 835 this=this, 836 expression=key, 837 ), 838 } 839 840 CAST_COLUMN_OPERATORS = { 841 TokenType.DOTCOLON, 842 TokenType.DCOLON, 843 } 844 845 EXPRESSION_PARSERS = { 846 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 847 exp.Column: lambda self: self._parse_column(), 848 exp.ColumnDef: lambda self: self._parse_column_def(self._parse_column()), 849 exp.Condition: lambda self: self._parse_disjunction(), 850 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 851 exp.Expression: lambda self: self._parse_expression(), 852 exp.From: lambda self: self._parse_from(joins=True), 853 exp.GrantPrincipal: lambda self: self._parse_grant_principal(), 854 exp.GrantPrivilege: lambda self: self._parse_grant_privilege(), 855 exp.Group: lambda self: self._parse_group(), 856 exp.Having: lambda self: self._parse_having(), 857 exp.Hint: lambda self: self._parse_hint_body(), 858 exp.Identifier: lambda self: self._parse_id_var(), 859 exp.Join: lambda self: self._parse_join(), 860 exp.Lambda: lambda self: self._parse_lambda(), 861 exp.Lateral: lambda self: self._parse_lateral(), 862 exp.Limit: lambda self: self._parse_limit(), 863 exp.Offset: lambda self: self._parse_offset(), 864 exp.Order: lambda self: self._parse_order(), 865 exp.Ordered: lambda self: self._parse_ordered(), 866 exp.Properties: lambda self: self._parse_properties(), 867 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 868 exp.Qualify: lambda self: self._parse_qualify(), 869 exp.Returning: lambda self: self._parse_returning(), 870 exp.Select: lambda self: self._parse_select(), 871 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 872 exp.Table: lambda self: self._parse_table_parts(), 873 exp.TableAlias: lambda self: self._parse_table_alias(), 874 exp.Tuple: lambda self: self._parse_value(values=False), 875 exp.Whens: lambda self: self._parse_when_matched(), 876 exp.Where: lambda self: self._parse_where(), 877 exp.Window: lambda self: self._parse_named_window(), 878 exp.With: lambda self: self._parse_with(), 879 "JOIN_TYPE": lambda self: self._parse_join_parts(), 880 } 881 882 STATEMENT_PARSERS = { 883 TokenType.ALTER: lambda self: self._parse_alter(), 884 TokenType.ANALYZE: lambda self: self._parse_analyze(), 885 TokenType.BEGIN: lambda self: self._parse_transaction(), 886 TokenType.CACHE: lambda self: self._parse_cache(), 887 TokenType.COMMENT: lambda self: self._parse_comment(), 888 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 889 TokenType.COPY: lambda self: self._parse_copy(), 890 TokenType.CREATE: lambda self: self._parse_create(), 891 TokenType.DELETE: lambda self: self._parse_delete(), 892 TokenType.DESC: lambda self: self._parse_describe(), 893 TokenType.DESCRIBE: lambda self: self._parse_describe(), 894 TokenType.DROP: lambda self: self._parse_drop(), 895 TokenType.GRANT: lambda self: self._parse_grant(), 896 TokenType.REVOKE: lambda self: self._parse_revoke(), 897 TokenType.INSERT: lambda self: self._parse_insert(), 898 TokenType.KILL: lambda self: self._parse_kill(), 899 TokenType.LOAD: lambda self: self._parse_load(), 900 TokenType.MERGE: lambda self: self._parse_merge(), 901 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 902 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 903 TokenType.REFRESH: lambda self: self._parse_refresh(), 904 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 905 TokenType.SET: lambda self: self._parse_set(), 906 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 907 TokenType.UNCACHE: lambda self: self._parse_uncache(), 908 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 909 TokenType.UPDATE: lambda self: self._parse_update(), 910 TokenType.USE: lambda self: self._parse_use(), 911 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 912 } 913 914 UNARY_PARSERS = { 915 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 916 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 917 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 918 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 919 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 920 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 921 } 922 923 STRING_PARSERS = { 924 TokenType.HEREDOC_STRING: lambda self, token: self.expression(exp.RawString, token=token), 925 TokenType.NATIONAL_STRING: lambda self, token: self.expression(exp.National, token=token), 926 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, token=token), 927 TokenType.STRING: lambda self, token: self.expression( 928 exp.Literal, token=token, is_string=True 929 ), 930 TokenType.UNICODE_STRING: lambda self, token: self.expression( 931 exp.UnicodeString, 932 token=token, 933 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 934 ), 935 } 936 937 NUMERIC_PARSERS = { 938 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, token=token), 939 TokenType.BYTE_STRING: lambda self, token: self.expression( 940 exp.ByteString, 941 token=token, 942 is_bytes=self.dialect.BYTE_STRING_IS_BYTES_TYPE or None, 943 ), 944 TokenType.HEX_STRING: lambda self, token: self.expression( 945 exp.HexString, 946 token=token, 947 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 948 ), 949 TokenType.NUMBER: lambda self, token: self.expression( 950 exp.Literal, token=token, is_string=False 951 ), 952 } 953 954 PRIMARY_PARSERS = { 955 **STRING_PARSERS, 956 **NUMERIC_PARSERS, 957 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 958 TokenType.NULL: lambda self, _: self.expression(exp.Null), 959 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 960 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 961 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 962 TokenType.STAR: lambda self, _: self._parse_star_ops(), 963 } 964 965 PLACEHOLDER_PARSERS = { 966 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 967 TokenType.PARAMETER: lambda self: self._parse_parameter(), 968 TokenType.COLON: lambda self: ( 969 self.expression(exp.Placeholder, this=self._prev.text) 970 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 971 else None 972 ), 973 } 974 975 RANGE_PARSERS = { 976 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 977 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 978 TokenType.GLOB: binary_range_parser(exp.Glob), 979 TokenType.ILIKE: binary_range_parser(exp.ILike), 980 TokenType.IN: lambda self, this: self._parse_in(this), 981 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 982 TokenType.IS: lambda self, this: self._parse_is(this), 983 TokenType.LIKE: binary_range_parser(exp.Like), 984 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 985 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 986 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 987 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 988 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 989 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 990 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 991 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 992 TokenType.ADJACENT: binary_range_parser(exp.Adjacent), 993 TokenType.OPERATOR: lambda self, this: self._parse_operator(this), 994 TokenType.AMP_LT: binary_range_parser(exp.ExtendsLeft), 995 TokenType.AMP_GT: binary_range_parser(exp.ExtendsRight), 996 } 997 998 PIPE_SYNTAX_TRANSFORM_PARSERS = { 999 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 1000 "AS": lambda self, query: self._build_pipe_cte( 1001 query, [exp.Star()], self._parse_table_alias() 1002 ), 1003 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 1004 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 1005 "ORDER BY": lambda self, query: query.order_by( 1006 self._parse_order(), append=False, copy=False 1007 ), 1008 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1009 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 1010 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 1011 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1012 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 1013 } 1014 1015 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 1016 "ALLOWED_VALUES": lambda self: self.expression( 1017 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 1018 ), 1019 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 1020 "AUTO": lambda self: self._parse_auto_property(), 1021 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 1022 "BACKUP": lambda self: self.expression( 1023 exp.BackupProperty, this=self._parse_var(any_token=True) 1024 ), 1025 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 1026 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1027 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1028 "CHECKSUM": lambda self: self._parse_checksum(), 1029 "CLUSTER BY": lambda self: self._parse_cluster(), 1030 "CLUSTERED": lambda self: self._parse_clustered_by(), 1031 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 1032 exp.CollateProperty, **kwargs 1033 ), 1034 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 1035 "CONTAINS": lambda self: self._parse_contains_property(), 1036 "COPY": lambda self: self._parse_copy_property(), 1037 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 1038 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 1039 "DEFINER": lambda self: self._parse_definer(), 1040 "DETERMINISTIC": lambda self: self.expression( 1041 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1042 ), 1043 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 1044 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 1045 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 1046 "DISTKEY": lambda self: self._parse_distkey(), 1047 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1048 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1049 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1050 "ENVIRONMENT": lambda self: self.expression( 1051 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1052 ), 1053 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1054 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1055 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1056 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1057 "FREESPACE": lambda self: self._parse_freespace(), 1058 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1059 "HEAP": lambda self: self.expression(exp.HeapProperty), 1060 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1061 "IMMUTABLE": lambda self: self.expression( 1062 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1063 ), 1064 "INHERITS": lambda self: self.expression( 1065 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1066 ), 1067 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1068 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1069 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1070 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1071 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1072 "LIKE": lambda self: self._parse_create_like(), 1073 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1074 "LOCK": lambda self: self._parse_locking(), 1075 "LOCKING": lambda self: self._parse_locking(), 1076 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1077 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1078 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1079 "MODIFIES": lambda self: self._parse_modifies_property(), 1080 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1081 "NO": lambda self: self._parse_no_property(), 1082 "ON": lambda self: self._parse_on_property(), 1083 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1084 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1085 "PARTITION": lambda self: self._parse_partitioned_of(), 1086 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1087 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1088 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1089 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1090 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1091 "READS": lambda self: self._parse_reads_property(), 1092 "REMOTE": lambda self: self._parse_remote_with_connection(), 1093 "RETURNS": lambda self: self._parse_returns(), 1094 "STRICT": lambda self: self.expression(exp.StrictProperty), 1095 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1096 "ROW": lambda self: self._parse_row(), 1097 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1098 "SAMPLE": lambda self: self.expression( 1099 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1100 ), 1101 "SECURE": lambda self: self.expression(exp.SecureProperty), 1102 "SECURITY": lambda self: self._parse_security(), 1103 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1104 "SETTINGS": lambda self: self._parse_settings_property(), 1105 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1106 "SORTKEY": lambda self: self._parse_sortkey(), 1107 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1108 "STABLE": lambda self: self.expression( 1109 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1110 ), 1111 "STORED": lambda self: self._parse_stored(), 1112 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1113 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1114 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1115 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1116 "TO": lambda self: self._parse_to_table(), 1117 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1118 "TRANSFORM": lambda self: self.expression( 1119 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1120 ), 1121 "TTL": lambda self: self._parse_ttl(), 1122 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1123 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1124 "VOLATILE": lambda self: self._parse_volatile_property(), 1125 "WITH": lambda self: self._parse_with_property(), 1126 } 1127 1128 CONSTRAINT_PARSERS = { 1129 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1130 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1131 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1132 "CHARACTER SET": lambda self: self.expression( 1133 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1134 ), 1135 "CHECK": lambda self: self.expression( 1136 exp.CheckColumnConstraint, 1137 this=self._parse_wrapped(self._parse_assignment), 1138 enforced=self._match_text_seq("ENFORCED"), 1139 ), 1140 "COLLATE": lambda self: self.expression( 1141 exp.CollateColumnConstraint, 1142 this=self._parse_identifier() or self._parse_column(), 1143 ), 1144 "COMMENT": lambda self: self.expression( 1145 exp.CommentColumnConstraint, this=self._parse_string() 1146 ), 1147 "COMPRESS": lambda self: self._parse_compress(), 1148 "CLUSTERED": lambda self: self.expression( 1149 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1150 ), 1151 "NONCLUSTERED": lambda self: self.expression( 1152 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1153 ), 1154 "DEFAULT": lambda self: self.expression( 1155 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1156 ), 1157 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1158 "EPHEMERAL": lambda self: self.expression( 1159 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1160 ), 1161 "EXCLUDE": lambda self: self.expression( 1162 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1163 ), 1164 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1165 "FORMAT": lambda self: self.expression( 1166 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1167 ), 1168 "GENERATED": lambda self: self._parse_generated_as_identity(), 1169 "IDENTITY": lambda self: self._parse_auto_increment(), 1170 "INLINE": lambda self: self._parse_inline(), 1171 "LIKE": lambda self: self._parse_create_like(), 1172 "NOT": lambda self: self._parse_not_constraint(), 1173 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1174 "ON": lambda self: ( 1175 self._match(TokenType.UPDATE) 1176 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1177 ) 1178 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1179 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1180 "PERIOD": lambda self: self._parse_period_for_system_time(), 1181 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1182 "REFERENCES": lambda self: self._parse_references(match=False), 1183 "TITLE": lambda self: self.expression( 1184 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1185 ), 1186 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1187 "UNIQUE": lambda self: self._parse_unique(), 1188 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1189 "WITH": lambda self: self.expression( 1190 exp.Properties, expressions=self._parse_wrapped_properties() 1191 ), 1192 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1193 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1194 } 1195 1196 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1197 if not self._match(TokenType.L_PAREN, advance=False): 1198 # Partitioning by bucket or truncate follows the syntax: 1199 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1200 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1201 self._retreat(self._index - 1) 1202 return None 1203 1204 klass = ( 1205 exp.PartitionedByBucket 1206 if self._prev.text.upper() == "BUCKET" 1207 else exp.PartitionByTruncate 1208 ) 1209 1210 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1211 this, expression = seq_get(args, 0), seq_get(args, 1) 1212 1213 if isinstance(this, exp.Literal): 1214 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1215 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1216 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1217 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1218 # 1219 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1220 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1221 this, expression = expression, this 1222 1223 return self.expression(klass, this=this, expression=expression) 1224 1225 ALTER_PARSERS = { 1226 "ADD": lambda self: self._parse_alter_table_add(), 1227 "AS": lambda self: self._parse_select(), 1228 "ALTER": lambda self: self._parse_alter_table_alter(), 1229 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1230 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1231 "DROP": lambda self: self._parse_alter_table_drop(), 1232 "RENAME": lambda self: self._parse_alter_table_rename(), 1233 "SET": lambda self: self._parse_alter_table_set(), 1234 "SWAP": lambda self: self.expression( 1235 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1236 ), 1237 } 1238 1239 ALTER_ALTER_PARSERS = { 1240 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1241 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1242 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1243 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1244 } 1245 1246 SCHEMA_UNNAMED_CONSTRAINTS = { 1247 "CHECK", 1248 "EXCLUDE", 1249 "FOREIGN KEY", 1250 "LIKE", 1251 "PERIOD", 1252 "PRIMARY KEY", 1253 "UNIQUE", 1254 "BUCKET", 1255 "TRUNCATE", 1256 } 1257 1258 NO_PAREN_FUNCTION_PARSERS = { 1259 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1260 "CASE": lambda self: self._parse_case(), 1261 "CONNECT_BY_ROOT": lambda self: self.expression( 1262 exp.ConnectByRoot, this=self._parse_column() 1263 ), 1264 "IF": lambda self: self._parse_if(), 1265 } 1266 1267 INVALID_FUNC_NAME_TOKENS = { 1268 TokenType.IDENTIFIER, 1269 TokenType.STRING, 1270 } 1271 1272 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1273 1274 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1275 1276 FUNCTION_PARSERS = { 1277 **{ 1278 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1279 }, 1280 **{ 1281 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1282 }, 1283 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1284 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1285 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1286 "CHAR": lambda self: self._parse_char(), 1287 "CHR": lambda self: self._parse_char(), 1288 "DECODE": lambda self: self._parse_decode(), 1289 "EXTRACT": lambda self: self._parse_extract(), 1290 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1291 "GAP_FILL": lambda self: self._parse_gap_fill(), 1292 "INITCAP": lambda self: self._parse_initcap(), 1293 "JSON_OBJECT": lambda self: self._parse_json_object(), 1294 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1295 "JSON_TABLE": lambda self: self._parse_json_table(), 1296 "MATCH": lambda self: self._parse_match_against(), 1297 "NORMALIZE": lambda self: self._parse_normalize(), 1298 "OPENJSON": lambda self: self._parse_open_json(), 1299 "OVERLAY": lambda self: self._parse_overlay(), 1300 "POSITION": lambda self: self._parse_position(), 1301 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1302 "STRING_AGG": lambda self: self._parse_string_agg(), 1303 "SUBSTRING": lambda self: self._parse_substring(), 1304 "TRIM": lambda self: self._parse_trim(), 1305 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1306 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1307 "XMLELEMENT": lambda self: self._parse_xml_element(), 1308 "XMLTABLE": lambda self: self._parse_xml_table(), 1309 } 1310 1311 QUERY_MODIFIER_PARSERS = { 1312 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1313 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1314 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1315 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1316 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1317 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1318 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1319 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1320 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1321 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1322 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1323 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1324 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1325 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1326 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1327 TokenType.CLUSTER_BY: lambda self: ( 1328 "cluster", 1329 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1330 ), 1331 TokenType.DISTRIBUTE_BY: lambda self: ( 1332 "distribute", 1333 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1334 ), 1335 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1336 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1337 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1338 } 1339 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1340 1341 SET_PARSERS = { 1342 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1343 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1344 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1345 "TRANSACTION": lambda self: self._parse_set_transaction(), 1346 } 1347 1348 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1349 1350 TYPE_LITERAL_PARSERS = { 1351 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1352 } 1353 1354 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1355 1356 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1357 1358 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1359 1360 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1361 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1362 "ISOLATION": ( 1363 ("LEVEL", "REPEATABLE", "READ"), 1364 ("LEVEL", "READ", "COMMITTED"), 1365 ("LEVEL", "READ", "UNCOMITTED"), 1366 ("LEVEL", "SERIALIZABLE"), 1367 ), 1368 "READ": ("WRITE", "ONLY"), 1369 } 1370 1371 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1372 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1373 ) 1374 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1375 1376 CREATE_SEQUENCE: OPTIONS_TYPE = { 1377 "SCALE": ("EXTEND", "NOEXTEND"), 1378 "SHARD": ("EXTEND", "NOEXTEND"), 1379 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1380 **dict.fromkeys( 1381 ( 1382 "SESSION", 1383 "GLOBAL", 1384 "KEEP", 1385 "NOKEEP", 1386 "ORDER", 1387 "NOORDER", 1388 "NOCACHE", 1389 "CYCLE", 1390 "NOCYCLE", 1391 "NOMINVALUE", 1392 "NOMAXVALUE", 1393 "NOSCALE", 1394 "NOSHARD", 1395 ), 1396 tuple(), 1397 ), 1398 } 1399 1400 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1401 1402 USABLES: OPTIONS_TYPE = dict.fromkeys( 1403 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1404 ) 1405 1406 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1407 1408 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1409 "TYPE": ("EVOLUTION",), 1410 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1411 } 1412 1413 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1414 1415 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1416 1417 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1418 "NOT": ("ENFORCED",), 1419 "MATCH": ( 1420 "FULL", 1421 "PARTIAL", 1422 "SIMPLE", 1423 ), 1424 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1425 "USING": ( 1426 "BTREE", 1427 "HASH", 1428 ), 1429 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1430 } 1431 1432 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1433 "NO": ("OTHERS",), 1434 "CURRENT": ("ROW",), 1435 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1436 } 1437 1438 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1439 1440 CLONE_KEYWORDS = {"CLONE", "COPY"} 1441 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1442 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1443 1444 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1445 1446 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1447 1448 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1449 1450 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1451 1452 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.RANGE, TokenType.ROWS} 1453 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1454 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1455 1456 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1457 1458 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1459 1460 ADD_CONSTRAINT_TOKENS = { 1461 TokenType.CONSTRAINT, 1462 TokenType.FOREIGN_KEY, 1463 TokenType.INDEX, 1464 TokenType.KEY, 1465 TokenType.PRIMARY_KEY, 1466 TokenType.UNIQUE, 1467 } 1468 1469 DISTINCT_TOKENS = {TokenType.DISTINCT} 1470 1471 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1472 1473 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1474 1475 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1476 1477 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1478 1479 ODBC_DATETIME_LITERALS: t.Dict[str, t.Type[exp.Expression]] = {} 1480 1481 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1482 1483 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1484 1485 # The style options for the DESCRIBE statement 1486 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1487 1488 SET_ASSIGNMENT_DELIMITERS = {"=", ":=", "TO"} 1489 1490 # The style options for the ANALYZE statement 1491 ANALYZE_STYLES = { 1492 "BUFFER_USAGE_LIMIT", 1493 "FULL", 1494 "LOCAL", 1495 "NO_WRITE_TO_BINLOG", 1496 "SAMPLE", 1497 "SKIP_LOCKED", 1498 "VERBOSE", 1499 } 1500 1501 ANALYZE_EXPRESSION_PARSERS = { 1502 "ALL": lambda self: self._parse_analyze_columns(), 1503 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1504 "DELETE": lambda self: self._parse_analyze_delete(), 1505 "DROP": lambda self: self._parse_analyze_histogram(), 1506 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1507 "LIST": lambda self: self._parse_analyze_list(), 1508 "PREDICATE": lambda self: self._parse_analyze_columns(), 1509 "UPDATE": lambda self: self._parse_analyze_histogram(), 1510 "VALIDATE": lambda self: self._parse_analyze_validate(), 1511 } 1512 1513 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1514 1515 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1516 1517 OPERATION_MODIFIERS: t.Set[str] = set() 1518 1519 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1520 1521 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows, exp.Values) 1522 1523 STRICT_CAST = True 1524 1525 PREFIXED_PIVOT_COLUMNS = False 1526 IDENTIFY_PIVOT_STRINGS = False 1527 1528 LOG_DEFAULTS_TO_LN = False 1529 1530 # Whether the table sample clause expects CSV syntax 1531 TABLESAMPLE_CSV = False 1532 1533 # The default method used for table sampling 1534 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1535 1536 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1537 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1538 1539 # Whether the TRIM function expects the characters to trim as its first argument 1540 TRIM_PATTERN_FIRST = False 1541 1542 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1543 STRING_ALIASES = False 1544 1545 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1546 MODIFIERS_ATTACHED_TO_SET_OP = True 1547 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1548 1549 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1550 NO_PAREN_IF_COMMANDS = True 1551 1552 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1553 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1554 1555 # Whether the `:` operator is used to extract a value from a VARIANT column 1556 COLON_IS_VARIANT_EXTRACT = False 1557 1558 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1559 # If this is True and '(' is not found, the keyword will be treated as an identifier 1560 VALUES_FOLLOWED_BY_PAREN = True 1561 1562 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1563 SUPPORTS_IMPLICIT_UNNEST = False 1564 1565 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1566 INTERVAL_SPANS = True 1567 1568 # Whether a PARTITION clause can follow a table reference 1569 SUPPORTS_PARTITION_SELECTION = False 1570 1571 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1572 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1573 1574 # Whether the 'AS' keyword is optional in the CTE definition syntax 1575 OPTIONAL_ALIAS_TOKEN_CTE = True 1576 1577 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1578 ALTER_RENAME_REQUIRES_COLUMN = True 1579 1580 # Whether Alter statements are allowed to contain Partition specifications 1581 ALTER_TABLE_PARTITIONS = False 1582 1583 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1584 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1585 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1586 # as BigQuery, where all joins have the same precedence. 1587 JOINS_HAVE_EQUAL_PRECEDENCE = False 1588 1589 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1590 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1591 1592 # Whether map literals support arbitrary expressions as keys. 1593 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1594 # When False, keys are typically restricted to identifiers. 1595 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1596 1597 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1598 # is true for Snowflake but not for BigQuery which can also process strings 1599 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1600 1601 # Dialects like Databricks support JOINS without join criteria 1602 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1603 ADD_JOIN_ON_TRUE = False 1604 1605 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1606 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1607 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = False 1608 1609 __slots__ = ( 1610 "error_level", 1611 "error_message_context", 1612 "max_errors", 1613 "dialect", 1614 "sql", 1615 "errors", 1616 "_tokens", 1617 "_index", 1618 "_curr", 1619 "_next", 1620 "_prev", 1621 "_prev_comments", 1622 "_pipe_cte_counter", 1623 ) 1624 1625 # Autofilled 1626 SHOW_TRIE: t.Dict = {} 1627 SET_TRIE: t.Dict = {} 1628 1629 def __init__( 1630 self, 1631 error_level: t.Optional[ErrorLevel] = None, 1632 error_message_context: int = 100, 1633 max_errors: int = 3, 1634 dialect: DialectType = None, 1635 ): 1636 from sqlglot.dialects import Dialect 1637 1638 self.error_level = error_level or ErrorLevel.IMMEDIATE 1639 self.error_message_context = error_message_context 1640 self.max_errors = max_errors 1641 self.dialect = Dialect.get_or_raise(dialect) 1642 self.reset() 1643 1644 def reset(self): 1645 self.sql = "" 1646 self.errors = [] 1647 self._tokens = [] 1648 self._index = 0 1649 self._curr = None 1650 self._next = None 1651 self._prev = None 1652 self._prev_comments = None 1653 self._pipe_cte_counter = 0 1654 1655 def parse( 1656 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1657 ) -> t.List[t.Optional[exp.Expression]]: 1658 """ 1659 Parses a list of tokens and returns a list of syntax trees, one tree 1660 per parsed SQL statement. 1661 1662 Args: 1663 raw_tokens: The list of tokens. 1664 sql: The original SQL string, used to produce helpful debug messages. 1665 1666 Returns: 1667 The list of the produced syntax trees. 1668 """ 1669 return self._parse( 1670 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1671 ) 1672 1673 def parse_into( 1674 self, 1675 expression_types: exp.IntoType, 1676 raw_tokens: t.List[Token], 1677 sql: t.Optional[str] = None, 1678 ) -> t.List[t.Optional[exp.Expression]]: 1679 """ 1680 Parses a list of tokens into a given Expression type. If a collection of Expression 1681 types is given instead, this method will try to parse the token list into each one 1682 of them, stopping at the first for which the parsing succeeds. 1683 1684 Args: 1685 expression_types: The expression type(s) to try and parse the token list into. 1686 raw_tokens: The list of tokens. 1687 sql: The original SQL string, used to produce helpful debug messages. 1688 1689 Returns: 1690 The target Expression. 1691 """ 1692 errors = [] 1693 for expression_type in ensure_list(expression_types): 1694 parser = self.EXPRESSION_PARSERS.get(expression_type) 1695 if not parser: 1696 raise TypeError(f"No parser registered for {expression_type}") 1697 1698 try: 1699 return self._parse(parser, raw_tokens, sql) 1700 except ParseError as e: 1701 e.errors[0]["into_expression"] = expression_type 1702 errors.append(e) 1703 1704 raise ParseError( 1705 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1706 errors=merge_errors(errors), 1707 ) from errors[-1] 1708 1709 def _parse( 1710 self, 1711 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1712 raw_tokens: t.List[Token], 1713 sql: t.Optional[str] = None, 1714 ) -> t.List[t.Optional[exp.Expression]]: 1715 self.reset() 1716 self.sql = sql or "" 1717 1718 total = len(raw_tokens) 1719 chunks: t.List[t.List[Token]] = [[]] 1720 1721 for i, token in enumerate(raw_tokens): 1722 if token.token_type == TokenType.SEMICOLON: 1723 if token.comments: 1724 chunks.append([token]) 1725 1726 if i < total - 1: 1727 chunks.append([]) 1728 else: 1729 chunks[-1].append(token) 1730 1731 expressions = [] 1732 1733 for tokens in chunks: 1734 self._index = -1 1735 self._tokens = tokens 1736 self._advance() 1737 1738 expressions.append(parse_method(self)) 1739 1740 if self._index < len(self._tokens): 1741 self.raise_error("Invalid expression / Unexpected token") 1742 1743 self.check_errors() 1744 1745 return expressions 1746 1747 def check_errors(self) -> None: 1748 """Logs or raises any found errors, depending on the chosen error level setting.""" 1749 if self.error_level == ErrorLevel.WARN: 1750 for error in self.errors: 1751 logger.error(str(error)) 1752 elif self.error_level == ErrorLevel.RAISE and self.errors: 1753 raise ParseError( 1754 concat_messages(self.errors, self.max_errors), 1755 errors=merge_errors(self.errors), 1756 ) 1757 1758 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1759 """ 1760 Appends an error in the list of recorded errors or raises it, depending on the chosen 1761 error level setting. 1762 """ 1763 token = token or self._curr or self._prev or Token.string("") 1764 formatted_sql, start_context, highlight, end_context = highlight_sql( 1765 sql=self.sql, 1766 positions=[(token.start, token.end)], 1767 context_length=self.error_message_context, 1768 ) 1769 formatted_message = f"{message}. Line {token.line}, Col: {token.col}.\n {formatted_sql}" 1770 1771 error = ParseError.new( 1772 formatted_message, 1773 description=message, 1774 line=token.line, 1775 col=token.col, 1776 start_context=start_context, 1777 highlight=highlight, 1778 end_context=end_context, 1779 ) 1780 1781 if self.error_level == ErrorLevel.IMMEDIATE: 1782 raise error 1783 1784 self.errors.append(error) 1785 1786 def expression( 1787 self, 1788 exp_class: t.Type[E], 1789 token: t.Optional[Token] = None, 1790 comments: t.Optional[t.List[str]] = None, 1791 **kwargs, 1792 ) -> E: 1793 """ 1794 Creates a new, validated Expression. 1795 1796 Args: 1797 exp_class: The expression class to instantiate. 1798 comments: An optional list of comments to attach to the expression. 1799 kwargs: The arguments to set for the expression along with their respective values. 1800 1801 Returns: 1802 The target expression. 1803 """ 1804 if token: 1805 instance = exp_class(this=token.text, **kwargs) 1806 instance.update_positions(token) 1807 else: 1808 instance = exp_class(**kwargs) 1809 instance.add_comments(comments) if comments else self._add_comments(instance) 1810 return self.validate_expression(instance) 1811 1812 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1813 if expression and self._prev_comments: 1814 expression.add_comments(self._prev_comments) 1815 self._prev_comments = None 1816 1817 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1818 """ 1819 Validates an Expression, making sure that all its mandatory arguments are set. 1820 1821 Args: 1822 expression: The expression to validate. 1823 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1824 1825 Returns: 1826 The validated expression. 1827 """ 1828 if self.error_level != ErrorLevel.IGNORE: 1829 for error_message in expression.error_messages(args): 1830 self.raise_error(error_message) 1831 1832 return expression 1833 1834 def _find_sql(self, start: Token, end: Token) -> str: 1835 return self.sql[start.start : end.end + 1] 1836 1837 def _is_connected(self) -> bool: 1838 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1839 1840 def _advance(self, times: int = 1) -> None: 1841 self._index += times 1842 self._curr = seq_get(self._tokens, self._index) 1843 self._next = seq_get(self._tokens, self._index + 1) 1844 1845 if self._index > 0: 1846 self._prev = self._tokens[self._index - 1] 1847 self._prev_comments = self._prev.comments 1848 else: 1849 self._prev = None 1850 self._prev_comments = None 1851 1852 def _retreat(self, index: int) -> None: 1853 if index != self._index: 1854 self._advance(index - self._index) 1855 1856 def _warn_unsupported(self) -> None: 1857 if len(self._tokens) <= 1: 1858 return 1859 1860 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1861 # interested in emitting a warning for the one being currently processed. 1862 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1863 1864 logger.warning( 1865 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1866 ) 1867 1868 def _parse_command(self) -> exp.Command: 1869 self._warn_unsupported() 1870 return self.expression( 1871 exp.Command, 1872 comments=self._prev_comments, 1873 this=self._prev.text.upper(), 1874 expression=self._parse_string(), 1875 ) 1876 1877 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1878 """ 1879 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1880 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1881 solve this by setting & resetting the parser state accordingly 1882 """ 1883 index = self._index 1884 error_level = self.error_level 1885 1886 self.error_level = ErrorLevel.IMMEDIATE 1887 try: 1888 this = parse_method() 1889 except ParseError: 1890 this = None 1891 finally: 1892 if not this or retreat: 1893 self._retreat(index) 1894 self.error_level = error_level 1895 1896 return this 1897 1898 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1899 start = self._prev 1900 exists = self._parse_exists() if allow_exists else None 1901 1902 self._match(TokenType.ON) 1903 1904 materialized = self._match_text_seq("MATERIALIZED") 1905 kind = self._match_set(self.CREATABLES) and self._prev 1906 if not kind: 1907 return self._parse_as_command(start) 1908 1909 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1910 this = self._parse_user_defined_function(kind=kind.token_type) 1911 elif kind.token_type == TokenType.TABLE: 1912 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1913 elif kind.token_type == TokenType.COLUMN: 1914 this = self._parse_column() 1915 else: 1916 this = self._parse_id_var() 1917 1918 self._match(TokenType.IS) 1919 1920 return self.expression( 1921 exp.Comment, 1922 this=this, 1923 kind=kind.text, 1924 expression=self._parse_string(), 1925 exists=exists, 1926 materialized=materialized, 1927 ) 1928 1929 def _parse_to_table( 1930 self, 1931 ) -> exp.ToTableProperty: 1932 table = self._parse_table_parts(schema=True) 1933 return self.expression(exp.ToTableProperty, this=table) 1934 1935 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1936 def _parse_ttl(self) -> exp.Expression: 1937 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1938 this = self._parse_bitwise() 1939 1940 if self._match_text_seq("DELETE"): 1941 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1942 if self._match_text_seq("RECOMPRESS"): 1943 return self.expression( 1944 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1945 ) 1946 if self._match_text_seq("TO", "DISK"): 1947 return self.expression( 1948 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1949 ) 1950 if self._match_text_seq("TO", "VOLUME"): 1951 return self.expression( 1952 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1953 ) 1954 1955 return this 1956 1957 expressions = self._parse_csv(_parse_ttl_action) 1958 where = self._parse_where() 1959 group = self._parse_group() 1960 1961 aggregates = None 1962 if group and self._match(TokenType.SET): 1963 aggregates = self._parse_csv(self._parse_set_item) 1964 1965 return self.expression( 1966 exp.MergeTreeTTL, 1967 expressions=expressions, 1968 where=where, 1969 group=group, 1970 aggregates=aggregates, 1971 ) 1972 1973 def _parse_statement(self) -> t.Optional[exp.Expression]: 1974 if self._curr is None: 1975 return None 1976 1977 if self._match_set(self.STATEMENT_PARSERS): 1978 comments = self._prev_comments 1979 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1980 stmt.add_comments(comments, prepend=True) 1981 return stmt 1982 1983 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1984 return self._parse_command() 1985 1986 expression = self._parse_expression() 1987 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1988 return self._parse_query_modifiers(expression) 1989 1990 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1991 start = self._prev 1992 temporary = self._match(TokenType.TEMPORARY) 1993 materialized = self._match_text_seq("MATERIALIZED") 1994 1995 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1996 if not kind: 1997 return self._parse_as_command(start) 1998 1999 concurrently = self._match_text_seq("CONCURRENTLY") 2000 if_exists = exists or self._parse_exists() 2001 2002 if kind == "COLUMN": 2003 this = self._parse_column() 2004 else: 2005 this = self._parse_table_parts( 2006 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 2007 ) 2008 2009 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 2010 2011 if self._match(TokenType.L_PAREN, advance=False): 2012 expressions = self._parse_wrapped_csv(self._parse_types) 2013 else: 2014 expressions = None 2015 2016 return self.expression( 2017 exp.Drop, 2018 exists=if_exists, 2019 this=this, 2020 expressions=expressions, 2021 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 2022 temporary=temporary, 2023 materialized=materialized, 2024 cascade=self._match_text_seq("CASCADE"), 2025 constraints=self._match_text_seq("CONSTRAINTS"), 2026 purge=self._match_text_seq("PURGE"), 2027 cluster=cluster, 2028 concurrently=concurrently, 2029 ) 2030 2031 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 2032 return ( 2033 self._match_text_seq("IF") 2034 and (not not_ or self._match(TokenType.NOT)) 2035 and self._match(TokenType.EXISTS) 2036 ) 2037 2038 def _parse_create(self) -> exp.Create | exp.Command: 2039 # Note: this can't be None because we've matched a statement parser 2040 start = self._prev 2041 2042 replace = ( 2043 start.token_type == TokenType.REPLACE 2044 or self._match_pair(TokenType.OR, TokenType.REPLACE) 2045 or self._match_pair(TokenType.OR, TokenType.ALTER) 2046 ) 2047 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 2048 2049 unique = self._match(TokenType.UNIQUE) 2050 2051 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2052 clustered = True 2053 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2054 "COLUMNSTORE" 2055 ): 2056 clustered = False 2057 else: 2058 clustered = None 2059 2060 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2061 self._advance() 2062 2063 properties = None 2064 create_token = self._match_set(self.CREATABLES) and self._prev 2065 2066 if not create_token: 2067 # exp.Properties.Location.POST_CREATE 2068 properties = self._parse_properties() 2069 create_token = self._match_set(self.CREATABLES) and self._prev 2070 2071 if not properties or not create_token: 2072 return self._parse_as_command(start) 2073 2074 concurrently = self._match_text_seq("CONCURRENTLY") 2075 exists = self._parse_exists(not_=True) 2076 this = None 2077 expression: t.Optional[exp.Expression] = None 2078 indexes = None 2079 no_schema_binding = None 2080 begin = None 2081 end = None 2082 clone = None 2083 2084 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2085 nonlocal properties 2086 if properties and temp_props: 2087 properties.expressions.extend(temp_props.expressions) 2088 elif temp_props: 2089 properties = temp_props 2090 2091 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2092 this = self._parse_user_defined_function(kind=create_token.token_type) 2093 2094 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2095 extend_props(self._parse_properties()) 2096 2097 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2098 extend_props(self._parse_properties()) 2099 2100 if not expression: 2101 if self._match(TokenType.COMMAND): 2102 expression = self._parse_as_command(self._prev) 2103 else: 2104 begin = self._match(TokenType.BEGIN) 2105 return_ = self._match_text_seq("RETURN") 2106 2107 if self._match(TokenType.STRING, advance=False): 2108 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2109 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2110 expression = self._parse_string() 2111 extend_props(self._parse_properties()) 2112 else: 2113 expression = self._parse_user_defined_function_expression() 2114 2115 end = self._match_text_seq("END") 2116 2117 if return_: 2118 expression = self.expression(exp.Return, this=expression) 2119 elif create_token.token_type == TokenType.INDEX: 2120 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2121 if not self._match(TokenType.ON): 2122 index = self._parse_id_var() 2123 anonymous = False 2124 else: 2125 index = None 2126 anonymous = True 2127 2128 this = self._parse_index(index=index, anonymous=anonymous) 2129 elif create_token.token_type in self.DB_CREATABLES: 2130 table_parts = self._parse_table_parts( 2131 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2132 ) 2133 2134 # exp.Properties.Location.POST_NAME 2135 self._match(TokenType.COMMA) 2136 extend_props(self._parse_properties(before=True)) 2137 2138 this = self._parse_schema(this=table_parts) 2139 2140 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2141 extend_props(self._parse_properties()) 2142 2143 has_alias = self._match(TokenType.ALIAS) 2144 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2145 # exp.Properties.Location.POST_ALIAS 2146 extend_props(self._parse_properties()) 2147 2148 if create_token.token_type == TokenType.SEQUENCE: 2149 expression = self._parse_types() 2150 props = self._parse_properties() 2151 if props: 2152 sequence_props = exp.SequenceProperties() 2153 options = [] 2154 for prop in props: 2155 if isinstance(prop, exp.SequenceProperties): 2156 for arg, value in prop.args.items(): 2157 if arg == "options": 2158 options.extend(value) 2159 else: 2160 sequence_props.set(arg, value) 2161 prop.pop() 2162 2163 if options: 2164 sequence_props.set("options", options) 2165 2166 props.append("expressions", sequence_props) 2167 extend_props(props) 2168 else: 2169 expression = self._parse_ddl_select() 2170 2171 # Some dialects also support using a table as an alias instead of a SELECT. 2172 # Here we fallback to this as an alternative. 2173 if not expression and has_alias: 2174 expression = self._try_parse(self._parse_table_parts) 2175 2176 if create_token.token_type == TokenType.TABLE: 2177 # exp.Properties.Location.POST_EXPRESSION 2178 extend_props(self._parse_properties()) 2179 2180 indexes = [] 2181 while True: 2182 index = self._parse_index() 2183 2184 # exp.Properties.Location.POST_INDEX 2185 extend_props(self._parse_properties()) 2186 if not index: 2187 break 2188 else: 2189 self._match(TokenType.COMMA) 2190 indexes.append(index) 2191 elif create_token.token_type == TokenType.VIEW: 2192 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2193 no_schema_binding = True 2194 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2195 extend_props(self._parse_properties()) 2196 2197 shallow = self._match_text_seq("SHALLOW") 2198 2199 if self._match_texts(self.CLONE_KEYWORDS): 2200 copy = self._prev.text.lower() == "copy" 2201 clone = self.expression( 2202 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2203 ) 2204 2205 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2206 return self._parse_as_command(start) 2207 2208 create_kind_text = create_token.text.upper() 2209 return self.expression( 2210 exp.Create, 2211 this=this, 2212 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2213 replace=replace, 2214 refresh=refresh, 2215 unique=unique, 2216 expression=expression, 2217 exists=exists, 2218 properties=properties, 2219 indexes=indexes, 2220 no_schema_binding=no_schema_binding, 2221 begin=begin, 2222 end=end, 2223 clone=clone, 2224 concurrently=concurrently, 2225 clustered=clustered, 2226 ) 2227 2228 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2229 seq = exp.SequenceProperties() 2230 2231 options = [] 2232 index = self._index 2233 2234 while self._curr: 2235 self._match(TokenType.COMMA) 2236 if self._match_text_seq("INCREMENT"): 2237 self._match_text_seq("BY") 2238 self._match_text_seq("=") 2239 seq.set("increment", self._parse_term()) 2240 elif self._match_text_seq("MINVALUE"): 2241 seq.set("minvalue", self._parse_term()) 2242 elif self._match_text_seq("MAXVALUE"): 2243 seq.set("maxvalue", self._parse_term()) 2244 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2245 self._match_text_seq("=") 2246 seq.set("start", self._parse_term()) 2247 elif self._match_text_seq("CACHE"): 2248 # T-SQL allows empty CACHE which is initialized dynamically 2249 seq.set("cache", self._parse_number() or True) 2250 elif self._match_text_seq("OWNED", "BY"): 2251 # "OWNED BY NONE" is the default 2252 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2253 else: 2254 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2255 if opt: 2256 options.append(opt) 2257 else: 2258 break 2259 2260 seq.set("options", options if options else None) 2261 return None if self._index == index else seq 2262 2263 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2264 # only used for teradata currently 2265 self._match(TokenType.COMMA) 2266 2267 kwargs = { 2268 "no": self._match_text_seq("NO"), 2269 "dual": self._match_text_seq("DUAL"), 2270 "before": self._match_text_seq("BEFORE"), 2271 "default": self._match_text_seq("DEFAULT"), 2272 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2273 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2274 "after": self._match_text_seq("AFTER"), 2275 "minimum": self._match_texts(("MIN", "MINIMUM")), 2276 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2277 } 2278 2279 if self._match_texts(self.PROPERTY_PARSERS): 2280 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2281 try: 2282 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2283 except TypeError: 2284 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2285 2286 return None 2287 2288 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2289 return self._parse_wrapped_csv(self._parse_property) 2290 2291 def _parse_property(self) -> t.Optional[exp.Expression]: 2292 if self._match_texts(self.PROPERTY_PARSERS): 2293 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2294 2295 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2296 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2297 2298 if self._match_text_seq("COMPOUND", "SORTKEY"): 2299 return self._parse_sortkey(compound=True) 2300 2301 if self._match_text_seq("SQL", "SECURITY"): 2302 return self.expression( 2303 exp.SqlSecurityProperty, 2304 this=self._match_texts(("DEFINER", "INVOKER")) and self._prev.text.upper(), 2305 ) 2306 2307 index = self._index 2308 2309 seq_props = self._parse_sequence_properties() 2310 if seq_props: 2311 return seq_props 2312 2313 self._retreat(index) 2314 key = self._parse_column() 2315 2316 if not self._match(TokenType.EQ): 2317 self._retreat(index) 2318 return None 2319 2320 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2321 if isinstance(key, exp.Column): 2322 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2323 2324 value = self._parse_bitwise() or self._parse_var(any_token=True) 2325 2326 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2327 if isinstance(value, exp.Column): 2328 value = exp.var(value.name) 2329 2330 return self.expression(exp.Property, this=key, value=value) 2331 2332 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2333 if self._match_text_seq("BY"): 2334 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2335 2336 self._match(TokenType.ALIAS) 2337 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2338 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2339 2340 return self.expression( 2341 exp.FileFormatProperty, 2342 this=( 2343 self.expression( 2344 exp.InputOutputFormat, 2345 input_format=input_format, 2346 output_format=output_format, 2347 ) 2348 if input_format or output_format 2349 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2350 ), 2351 hive_format=True, 2352 ) 2353 2354 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2355 field = self._parse_field() 2356 if isinstance(field, exp.Identifier) and not field.quoted: 2357 field = exp.var(field) 2358 2359 return field 2360 2361 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2362 self._match(TokenType.EQ) 2363 self._match(TokenType.ALIAS) 2364 2365 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2366 2367 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2368 properties = [] 2369 while True: 2370 if before: 2371 prop = self._parse_property_before() 2372 else: 2373 prop = self._parse_property() 2374 if not prop: 2375 break 2376 for p in ensure_list(prop): 2377 properties.append(p) 2378 2379 if properties: 2380 return self.expression(exp.Properties, expressions=properties) 2381 2382 return None 2383 2384 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2385 return self.expression( 2386 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2387 ) 2388 2389 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2390 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2391 security_specifier = self._prev.text.upper() 2392 return self.expression(exp.SecurityProperty, this=security_specifier) 2393 return None 2394 2395 def _parse_settings_property(self) -> exp.SettingsProperty: 2396 return self.expression( 2397 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2398 ) 2399 2400 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2401 if self._index >= 2: 2402 pre_volatile_token = self._tokens[self._index - 2] 2403 else: 2404 pre_volatile_token = None 2405 2406 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2407 return exp.VolatileProperty() 2408 2409 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2410 2411 def _parse_retention_period(self) -> exp.Var: 2412 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2413 number = self._parse_number() 2414 number_str = f"{number} " if number else "" 2415 unit = self._parse_var(any_token=True) 2416 return exp.var(f"{number_str}{unit}") 2417 2418 def _parse_system_versioning_property( 2419 self, with_: bool = False 2420 ) -> exp.WithSystemVersioningProperty: 2421 self._match(TokenType.EQ) 2422 prop = self.expression( 2423 exp.WithSystemVersioningProperty, 2424 on=True, 2425 with_=with_, 2426 ) 2427 2428 if self._match_text_seq("OFF"): 2429 prop.set("on", False) 2430 return prop 2431 2432 self._match(TokenType.ON) 2433 if self._match(TokenType.L_PAREN): 2434 while self._curr and not self._match(TokenType.R_PAREN): 2435 if self._match_text_seq("HISTORY_TABLE", "="): 2436 prop.set("this", self._parse_table_parts()) 2437 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2438 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2439 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2440 prop.set("retention_period", self._parse_retention_period()) 2441 2442 self._match(TokenType.COMMA) 2443 2444 return prop 2445 2446 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2447 self._match(TokenType.EQ) 2448 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2449 prop = self.expression(exp.DataDeletionProperty, on=on) 2450 2451 if self._match(TokenType.L_PAREN): 2452 while self._curr and not self._match(TokenType.R_PAREN): 2453 if self._match_text_seq("FILTER_COLUMN", "="): 2454 prop.set("filter_column", self._parse_column()) 2455 elif self._match_text_seq("RETENTION_PERIOD", "="): 2456 prop.set("retention_period", self._parse_retention_period()) 2457 2458 self._match(TokenType.COMMA) 2459 2460 return prop 2461 2462 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2463 kind = "HASH" 2464 expressions: t.Optional[t.List[exp.Expression]] = None 2465 if self._match_text_seq("BY", "HASH"): 2466 expressions = self._parse_wrapped_csv(self._parse_id_var) 2467 elif self._match_text_seq("BY", "RANDOM"): 2468 kind = "RANDOM" 2469 2470 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2471 buckets: t.Optional[exp.Expression] = None 2472 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2473 buckets = self._parse_number() 2474 2475 return self.expression( 2476 exp.DistributedByProperty, 2477 expressions=expressions, 2478 kind=kind, 2479 buckets=buckets, 2480 order=self._parse_order(), 2481 ) 2482 2483 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2484 self._match_text_seq("KEY") 2485 expressions = self._parse_wrapped_id_vars() 2486 return self.expression(expr_type, expressions=expressions) 2487 2488 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2489 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2490 prop = self._parse_system_versioning_property(with_=True) 2491 self._match_r_paren() 2492 return prop 2493 2494 if self._match(TokenType.L_PAREN, advance=False): 2495 return self._parse_wrapped_properties() 2496 2497 if self._match_text_seq("JOURNAL"): 2498 return self._parse_withjournaltable() 2499 2500 if self._match_texts(self.VIEW_ATTRIBUTES): 2501 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2502 2503 if self._match_text_seq("DATA"): 2504 return self._parse_withdata(no=False) 2505 elif self._match_text_seq("NO", "DATA"): 2506 return self._parse_withdata(no=True) 2507 2508 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2509 return self._parse_serde_properties(with_=True) 2510 2511 if self._match(TokenType.SCHEMA): 2512 return self.expression( 2513 exp.WithSchemaBindingProperty, 2514 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2515 ) 2516 2517 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2518 return self.expression( 2519 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2520 ) 2521 2522 if not self._next: 2523 return None 2524 2525 return self._parse_withisolatedloading() 2526 2527 def _parse_procedure_option(self) -> exp.Expression | None: 2528 if self._match_text_seq("EXECUTE", "AS"): 2529 return self.expression( 2530 exp.ExecuteAsProperty, 2531 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2532 or self._parse_string(), 2533 ) 2534 2535 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2536 2537 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2538 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2539 self._match(TokenType.EQ) 2540 2541 user = self._parse_id_var() 2542 self._match(TokenType.PARAMETER) 2543 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2544 2545 if not user or not host: 2546 return None 2547 2548 return exp.DefinerProperty(this=f"{user}@{host}") 2549 2550 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2551 self._match(TokenType.TABLE) 2552 self._match(TokenType.EQ) 2553 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2554 2555 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2556 return self.expression(exp.LogProperty, no=no) 2557 2558 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2559 return self.expression(exp.JournalProperty, **kwargs) 2560 2561 def _parse_checksum(self) -> exp.ChecksumProperty: 2562 self._match(TokenType.EQ) 2563 2564 on = None 2565 if self._match(TokenType.ON): 2566 on = True 2567 elif self._match_text_seq("OFF"): 2568 on = False 2569 2570 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2571 2572 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2573 return self.expression( 2574 exp.Cluster, 2575 expressions=( 2576 self._parse_wrapped_csv(self._parse_ordered) 2577 if wrapped 2578 else self._parse_csv(self._parse_ordered) 2579 ), 2580 ) 2581 2582 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2583 self._match_text_seq("BY") 2584 2585 self._match_l_paren() 2586 expressions = self._parse_csv(self._parse_column) 2587 self._match_r_paren() 2588 2589 if self._match_text_seq("SORTED", "BY"): 2590 self._match_l_paren() 2591 sorted_by = self._parse_csv(self._parse_ordered) 2592 self._match_r_paren() 2593 else: 2594 sorted_by = None 2595 2596 self._match(TokenType.INTO) 2597 buckets = self._parse_number() 2598 self._match_text_seq("BUCKETS") 2599 2600 return self.expression( 2601 exp.ClusteredByProperty, 2602 expressions=expressions, 2603 sorted_by=sorted_by, 2604 buckets=buckets, 2605 ) 2606 2607 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2608 if not self._match_text_seq("GRANTS"): 2609 self._retreat(self._index - 1) 2610 return None 2611 2612 return self.expression(exp.CopyGrantsProperty) 2613 2614 def _parse_freespace(self) -> exp.FreespaceProperty: 2615 self._match(TokenType.EQ) 2616 return self.expression( 2617 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2618 ) 2619 2620 def _parse_mergeblockratio( 2621 self, no: bool = False, default: bool = False 2622 ) -> exp.MergeBlockRatioProperty: 2623 if self._match(TokenType.EQ): 2624 return self.expression( 2625 exp.MergeBlockRatioProperty, 2626 this=self._parse_number(), 2627 percent=self._match(TokenType.PERCENT), 2628 ) 2629 2630 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2631 2632 def _parse_datablocksize( 2633 self, 2634 default: t.Optional[bool] = None, 2635 minimum: t.Optional[bool] = None, 2636 maximum: t.Optional[bool] = None, 2637 ) -> exp.DataBlocksizeProperty: 2638 self._match(TokenType.EQ) 2639 size = self._parse_number() 2640 2641 units = None 2642 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2643 units = self._prev.text 2644 2645 return self.expression( 2646 exp.DataBlocksizeProperty, 2647 size=size, 2648 units=units, 2649 default=default, 2650 minimum=minimum, 2651 maximum=maximum, 2652 ) 2653 2654 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2655 self._match(TokenType.EQ) 2656 always = self._match_text_seq("ALWAYS") 2657 manual = self._match_text_seq("MANUAL") 2658 never = self._match_text_seq("NEVER") 2659 default = self._match_text_seq("DEFAULT") 2660 2661 autotemp = None 2662 if self._match_text_seq("AUTOTEMP"): 2663 autotemp = self._parse_schema() 2664 2665 return self.expression( 2666 exp.BlockCompressionProperty, 2667 always=always, 2668 manual=manual, 2669 never=never, 2670 default=default, 2671 autotemp=autotemp, 2672 ) 2673 2674 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2675 index = self._index 2676 no = self._match_text_seq("NO") 2677 concurrent = self._match_text_seq("CONCURRENT") 2678 2679 if not self._match_text_seq("ISOLATED", "LOADING"): 2680 self._retreat(index) 2681 return None 2682 2683 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2684 return self.expression( 2685 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2686 ) 2687 2688 def _parse_locking(self) -> exp.LockingProperty: 2689 if self._match(TokenType.TABLE): 2690 kind = "TABLE" 2691 elif self._match(TokenType.VIEW): 2692 kind = "VIEW" 2693 elif self._match(TokenType.ROW): 2694 kind = "ROW" 2695 elif self._match_text_seq("DATABASE"): 2696 kind = "DATABASE" 2697 else: 2698 kind = None 2699 2700 if kind in ("DATABASE", "TABLE", "VIEW"): 2701 this = self._parse_table_parts() 2702 else: 2703 this = None 2704 2705 if self._match(TokenType.FOR): 2706 for_or_in = "FOR" 2707 elif self._match(TokenType.IN): 2708 for_or_in = "IN" 2709 else: 2710 for_or_in = None 2711 2712 if self._match_text_seq("ACCESS"): 2713 lock_type = "ACCESS" 2714 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2715 lock_type = "EXCLUSIVE" 2716 elif self._match_text_seq("SHARE"): 2717 lock_type = "SHARE" 2718 elif self._match_text_seq("READ"): 2719 lock_type = "READ" 2720 elif self._match_text_seq("WRITE"): 2721 lock_type = "WRITE" 2722 elif self._match_text_seq("CHECKSUM"): 2723 lock_type = "CHECKSUM" 2724 else: 2725 lock_type = None 2726 2727 override = self._match_text_seq("OVERRIDE") 2728 2729 return self.expression( 2730 exp.LockingProperty, 2731 this=this, 2732 kind=kind, 2733 for_or_in=for_or_in, 2734 lock_type=lock_type, 2735 override=override, 2736 ) 2737 2738 def _parse_partition_by(self) -> t.List[exp.Expression]: 2739 if self._match(TokenType.PARTITION_BY): 2740 return self._parse_csv(self._parse_disjunction) 2741 return [] 2742 2743 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2744 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2745 if self._match_text_seq("MINVALUE"): 2746 return exp.var("MINVALUE") 2747 if self._match_text_seq("MAXVALUE"): 2748 return exp.var("MAXVALUE") 2749 return self._parse_bitwise() 2750 2751 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2752 expression = None 2753 from_expressions = None 2754 to_expressions = None 2755 2756 if self._match(TokenType.IN): 2757 this = self._parse_wrapped_csv(self._parse_bitwise) 2758 elif self._match(TokenType.FROM): 2759 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2760 self._match_text_seq("TO") 2761 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2762 elif self._match_text_seq("WITH", "(", "MODULUS"): 2763 this = self._parse_number() 2764 self._match_text_seq(",", "REMAINDER") 2765 expression = self._parse_number() 2766 self._match_r_paren() 2767 else: 2768 self.raise_error("Failed to parse partition bound spec.") 2769 2770 return self.expression( 2771 exp.PartitionBoundSpec, 2772 this=this, 2773 expression=expression, 2774 from_expressions=from_expressions, 2775 to_expressions=to_expressions, 2776 ) 2777 2778 # https://www.postgresql.org/docs/current/sql-createtable.html 2779 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2780 if not self._match_text_seq("OF"): 2781 self._retreat(self._index - 1) 2782 return None 2783 2784 this = self._parse_table(schema=True) 2785 2786 if self._match(TokenType.DEFAULT): 2787 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2788 elif self._match_text_seq("FOR", "VALUES"): 2789 expression = self._parse_partition_bound_spec() 2790 else: 2791 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2792 2793 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2794 2795 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2796 self._match(TokenType.EQ) 2797 return self.expression( 2798 exp.PartitionedByProperty, 2799 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2800 ) 2801 2802 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2803 if self._match_text_seq("AND", "STATISTICS"): 2804 statistics = True 2805 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2806 statistics = False 2807 else: 2808 statistics = None 2809 2810 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2811 2812 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2813 if self._match_text_seq("SQL"): 2814 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2815 return None 2816 2817 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2818 if self._match_text_seq("SQL", "DATA"): 2819 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2820 return None 2821 2822 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2823 if self._match_text_seq("PRIMARY", "INDEX"): 2824 return exp.NoPrimaryIndexProperty() 2825 if self._match_text_seq("SQL"): 2826 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2827 return None 2828 2829 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2830 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2831 return exp.OnCommitProperty() 2832 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2833 return exp.OnCommitProperty(delete=True) 2834 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2835 2836 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2837 if self._match_text_seq("SQL", "DATA"): 2838 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2839 return None 2840 2841 def _parse_distkey(self) -> exp.DistKeyProperty: 2842 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2843 2844 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2845 table = self._parse_table(schema=True) 2846 2847 options = [] 2848 while self._match_texts(("INCLUDING", "EXCLUDING")): 2849 this = self._prev.text.upper() 2850 2851 id_var = self._parse_id_var() 2852 if not id_var: 2853 return None 2854 2855 options.append( 2856 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2857 ) 2858 2859 return self.expression(exp.LikeProperty, this=table, expressions=options) 2860 2861 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2862 return self.expression( 2863 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2864 ) 2865 2866 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2867 self._match(TokenType.EQ) 2868 return self.expression( 2869 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2870 ) 2871 2872 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2873 self._match_text_seq("WITH", "CONNECTION") 2874 return self.expression( 2875 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2876 ) 2877 2878 def _parse_returns(self) -> exp.ReturnsProperty: 2879 value: t.Optional[exp.Expression] 2880 null = None 2881 is_table = self._match(TokenType.TABLE) 2882 2883 if is_table: 2884 if self._match(TokenType.LT): 2885 value = self.expression( 2886 exp.Schema, 2887 this="TABLE", 2888 expressions=self._parse_csv(self._parse_struct_types), 2889 ) 2890 if not self._match(TokenType.GT): 2891 self.raise_error("Expecting >") 2892 else: 2893 value = self._parse_schema(exp.var("TABLE")) 2894 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2895 null = True 2896 value = None 2897 else: 2898 value = self._parse_types() 2899 2900 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2901 2902 def _parse_describe(self) -> exp.Describe: 2903 kind = self._match_set(self.CREATABLES) and self._prev.text 2904 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2905 if self._match(TokenType.DOT): 2906 style = None 2907 self._retreat(self._index - 2) 2908 2909 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2910 2911 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2912 this = self._parse_statement() 2913 else: 2914 this = self._parse_table(schema=True) 2915 2916 properties = self._parse_properties() 2917 expressions = properties.expressions if properties else None 2918 partition = self._parse_partition() 2919 return self.expression( 2920 exp.Describe, 2921 this=this, 2922 style=style, 2923 kind=kind, 2924 expressions=expressions, 2925 partition=partition, 2926 format=format, 2927 ) 2928 2929 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2930 kind = self._prev.text.upper() 2931 expressions = [] 2932 2933 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2934 if self._match(TokenType.WHEN): 2935 expression = self._parse_disjunction() 2936 self._match(TokenType.THEN) 2937 else: 2938 expression = None 2939 2940 else_ = self._match(TokenType.ELSE) 2941 2942 if not self._match(TokenType.INTO): 2943 return None 2944 2945 return self.expression( 2946 exp.ConditionalInsert, 2947 this=self.expression( 2948 exp.Insert, 2949 this=self._parse_table(schema=True), 2950 expression=self._parse_derived_table_values(), 2951 ), 2952 expression=expression, 2953 else_=else_, 2954 ) 2955 2956 expression = parse_conditional_insert() 2957 while expression is not None: 2958 expressions.append(expression) 2959 expression = parse_conditional_insert() 2960 2961 return self.expression( 2962 exp.MultitableInserts, 2963 kind=kind, 2964 comments=comments, 2965 expressions=expressions, 2966 source=self._parse_table(), 2967 ) 2968 2969 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2970 comments = [] 2971 hint = self._parse_hint() 2972 overwrite = self._match(TokenType.OVERWRITE) 2973 ignore = self._match(TokenType.IGNORE) 2974 local = self._match_text_seq("LOCAL") 2975 alternative = None 2976 is_function = None 2977 2978 if self._match_text_seq("DIRECTORY"): 2979 this: t.Optional[exp.Expression] = self.expression( 2980 exp.Directory, 2981 this=self._parse_var_or_string(), 2982 local=local, 2983 row_format=self._parse_row_format(match_row=True), 2984 ) 2985 else: 2986 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2987 comments += ensure_list(self._prev_comments) 2988 return self._parse_multitable_inserts(comments) 2989 2990 if self._match(TokenType.OR): 2991 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2992 2993 self._match(TokenType.INTO) 2994 comments += ensure_list(self._prev_comments) 2995 self._match(TokenType.TABLE) 2996 is_function = self._match(TokenType.FUNCTION) 2997 2998 this = self._parse_function() if is_function else self._parse_insert_table() 2999 3000 returning = self._parse_returning() # TSQL allows RETURNING before source 3001 3002 return self.expression( 3003 exp.Insert, 3004 comments=comments, 3005 hint=hint, 3006 is_function=is_function, 3007 this=this, 3008 stored=self._match_text_seq("STORED") and self._parse_stored(), 3009 by_name=self._match_text_seq("BY", "NAME"), 3010 exists=self._parse_exists(), 3011 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 3012 and self._parse_disjunction(), 3013 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 3014 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 3015 default=self._match_text_seq("DEFAULT", "VALUES"), 3016 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 3017 conflict=self._parse_on_conflict(), 3018 returning=returning or self._parse_returning(), 3019 overwrite=overwrite, 3020 alternative=alternative, 3021 ignore=ignore, 3022 source=self._match(TokenType.TABLE) and self._parse_table(), 3023 ) 3024 3025 def _parse_insert_table(self) -> t.Optional[exp.Expression]: 3026 this = self._parse_table(schema=True, parse_partition=True) 3027 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 3028 this.set("alias", self._parse_table_alias()) 3029 return this 3030 3031 def _parse_kill(self) -> exp.Kill: 3032 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 3033 3034 return self.expression( 3035 exp.Kill, 3036 this=self._parse_primary(), 3037 kind=kind, 3038 ) 3039 3040 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 3041 conflict = self._match_text_seq("ON", "CONFLICT") 3042 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 3043 3044 if not conflict and not duplicate: 3045 return None 3046 3047 conflict_keys = None 3048 constraint = None 3049 3050 if conflict: 3051 if self._match_text_seq("ON", "CONSTRAINT"): 3052 constraint = self._parse_id_var() 3053 elif self._match(TokenType.L_PAREN): 3054 conflict_keys = self._parse_csv(self._parse_id_var) 3055 self._match_r_paren() 3056 3057 index_predicate = self._parse_where() 3058 3059 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3060 if self._prev.token_type == TokenType.UPDATE: 3061 self._match(TokenType.SET) 3062 expressions = self._parse_csv(self._parse_equality) 3063 else: 3064 expressions = None 3065 3066 return self.expression( 3067 exp.OnConflict, 3068 duplicate=duplicate, 3069 expressions=expressions, 3070 action=action, 3071 conflict_keys=conflict_keys, 3072 index_predicate=index_predicate, 3073 constraint=constraint, 3074 where=self._parse_where(), 3075 ) 3076 3077 def _parse_returning(self) -> t.Optional[exp.Returning]: 3078 if not self._match(TokenType.RETURNING): 3079 return None 3080 return self.expression( 3081 exp.Returning, 3082 expressions=self._parse_csv(self._parse_expression), 3083 into=self._match(TokenType.INTO) and self._parse_table_part(), 3084 ) 3085 3086 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3087 if not self._match(TokenType.FORMAT): 3088 return None 3089 return self._parse_row_format() 3090 3091 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3092 index = self._index 3093 with_ = with_ or self._match_text_seq("WITH") 3094 3095 if not self._match(TokenType.SERDE_PROPERTIES): 3096 self._retreat(index) 3097 return None 3098 return self.expression( 3099 exp.SerdeProperties, 3100 expressions=self._parse_wrapped_properties(), 3101 with_=with_, 3102 ) 3103 3104 def _parse_row_format( 3105 self, match_row: bool = False 3106 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3107 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3108 return None 3109 3110 if self._match_text_seq("SERDE"): 3111 this = self._parse_string() 3112 3113 serde_properties = self._parse_serde_properties() 3114 3115 return self.expression( 3116 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3117 ) 3118 3119 self._match_text_seq("DELIMITED") 3120 3121 kwargs = {} 3122 3123 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3124 kwargs["fields"] = self._parse_string() 3125 if self._match_text_seq("ESCAPED", "BY"): 3126 kwargs["escaped"] = self._parse_string() 3127 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3128 kwargs["collection_items"] = self._parse_string() 3129 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3130 kwargs["map_keys"] = self._parse_string() 3131 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3132 kwargs["lines"] = self._parse_string() 3133 if self._match_text_seq("NULL", "DEFINED", "AS"): 3134 kwargs["null"] = self._parse_string() 3135 3136 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3137 3138 def _parse_load(self) -> exp.LoadData | exp.Command: 3139 if self._match_text_seq("DATA"): 3140 local = self._match_text_seq("LOCAL") 3141 self._match_text_seq("INPATH") 3142 inpath = self._parse_string() 3143 overwrite = self._match(TokenType.OVERWRITE) 3144 self._match_pair(TokenType.INTO, TokenType.TABLE) 3145 3146 return self.expression( 3147 exp.LoadData, 3148 this=self._parse_table(schema=True), 3149 local=local, 3150 overwrite=overwrite, 3151 inpath=inpath, 3152 partition=self._parse_partition(), 3153 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3154 serde=self._match_text_seq("SERDE") and self._parse_string(), 3155 ) 3156 return self._parse_as_command(self._prev) 3157 3158 def _parse_delete(self) -> exp.Delete: 3159 # This handles MySQL's "Multiple-Table Syntax" 3160 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3161 tables = None 3162 if not self._match(TokenType.FROM, advance=False): 3163 tables = self._parse_csv(self._parse_table) or None 3164 3165 returning = self._parse_returning() 3166 3167 return self.expression( 3168 exp.Delete, 3169 tables=tables, 3170 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3171 using=self._match(TokenType.USING) 3172 and self._parse_csv(lambda: self._parse_table(joins=True)), 3173 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3174 where=self._parse_where(), 3175 returning=returning or self._parse_returning(), 3176 order=self._parse_order(), 3177 limit=self._parse_limit(), 3178 ) 3179 3180 def _parse_update(self) -> exp.Update: 3181 kwargs: t.Dict[str, t.Any] = { 3182 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3183 } 3184 while self._curr: 3185 if self._match(TokenType.SET): 3186 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3187 elif self._match(TokenType.RETURNING, advance=False): 3188 kwargs["returning"] = self._parse_returning() 3189 elif self._match(TokenType.FROM, advance=False): 3190 from_ = self._parse_from(joins=True) 3191 table = from_.this if from_ else None 3192 if isinstance(table, exp.Subquery) and self._match(TokenType.JOIN, advance=False): 3193 table.set("joins", list(self._parse_joins()) or None) 3194 3195 kwargs["from_"] = from_ 3196 elif self._match(TokenType.WHERE, advance=False): 3197 kwargs["where"] = self._parse_where() 3198 elif self._match(TokenType.ORDER_BY, advance=False): 3199 kwargs["order"] = self._parse_order() 3200 elif self._match(TokenType.LIMIT, advance=False): 3201 kwargs["limit"] = self._parse_limit() 3202 else: 3203 break 3204 3205 return self.expression(exp.Update, **kwargs) 3206 3207 def _parse_use(self) -> exp.Use: 3208 return self.expression( 3209 exp.Use, 3210 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3211 this=self._parse_table(schema=False), 3212 ) 3213 3214 def _parse_uncache(self) -> exp.Uncache: 3215 if not self._match(TokenType.TABLE): 3216 self.raise_error("Expecting TABLE after UNCACHE") 3217 3218 return self.expression( 3219 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3220 ) 3221 3222 def _parse_cache(self) -> exp.Cache: 3223 lazy = self._match_text_seq("LAZY") 3224 self._match(TokenType.TABLE) 3225 table = self._parse_table(schema=True) 3226 3227 options = [] 3228 if self._match_text_seq("OPTIONS"): 3229 self._match_l_paren() 3230 k = self._parse_string() 3231 self._match(TokenType.EQ) 3232 v = self._parse_string() 3233 options = [k, v] 3234 self._match_r_paren() 3235 3236 self._match(TokenType.ALIAS) 3237 return self.expression( 3238 exp.Cache, 3239 this=table, 3240 lazy=lazy, 3241 options=options, 3242 expression=self._parse_select(nested=True), 3243 ) 3244 3245 def _parse_partition(self) -> t.Optional[exp.Partition]: 3246 if not self._match_texts(self.PARTITION_KEYWORDS): 3247 return None 3248 3249 return self.expression( 3250 exp.Partition, 3251 subpartition=self._prev.text.upper() == "SUBPARTITION", 3252 expressions=self._parse_wrapped_csv(self._parse_disjunction), 3253 ) 3254 3255 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3256 def _parse_value_expression() -> t.Optional[exp.Expression]: 3257 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3258 return exp.var(self._prev.text.upper()) 3259 return self._parse_expression() 3260 3261 if self._match(TokenType.L_PAREN): 3262 expressions = self._parse_csv(_parse_value_expression) 3263 self._match_r_paren() 3264 return self.expression(exp.Tuple, expressions=expressions) 3265 3266 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3267 expression = self._parse_expression() 3268 if expression: 3269 return self.expression(exp.Tuple, expressions=[expression]) 3270 return None 3271 3272 def _parse_projections(self) -> t.List[exp.Expression]: 3273 return self._parse_expressions() 3274 3275 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3276 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3277 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3278 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3279 ) 3280 elif self._match(TokenType.FROM): 3281 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3282 # Support parentheses for duckdb FROM-first syntax 3283 select = self._parse_select(from_=from_) 3284 if select: 3285 if not select.args.get("from_"): 3286 select.set("from_", from_) 3287 this = select 3288 else: 3289 this = exp.select("*").from_(t.cast(exp.From, from_)) 3290 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3291 else: 3292 this = ( 3293 self._parse_table(consume_pipe=True) 3294 if table 3295 else self._parse_select(nested=True, parse_set_operation=False) 3296 ) 3297 3298 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3299 # in case a modifier (e.g. join) is following 3300 if table and isinstance(this, exp.Values) and this.alias: 3301 alias = this.args["alias"].pop() 3302 this = exp.Table(this=this, alias=alias) 3303 3304 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3305 3306 return this 3307 3308 def _parse_select( 3309 self, 3310 nested: bool = False, 3311 table: bool = False, 3312 parse_subquery_alias: bool = True, 3313 parse_set_operation: bool = True, 3314 consume_pipe: bool = True, 3315 from_: t.Optional[exp.From] = None, 3316 ) -> t.Optional[exp.Expression]: 3317 query = self._parse_select_query( 3318 nested=nested, 3319 table=table, 3320 parse_subquery_alias=parse_subquery_alias, 3321 parse_set_operation=parse_set_operation, 3322 ) 3323 3324 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3325 if not query and from_: 3326 query = exp.select("*").from_(from_) 3327 if isinstance(query, exp.Query): 3328 query = self._parse_pipe_syntax_query(query) 3329 query = query.subquery(copy=False) if query and table else query 3330 3331 return query 3332 3333 def _parse_select_query( 3334 self, 3335 nested: bool = False, 3336 table: bool = False, 3337 parse_subquery_alias: bool = True, 3338 parse_set_operation: bool = True, 3339 ) -> t.Optional[exp.Expression]: 3340 cte = self._parse_with() 3341 3342 if cte: 3343 this = self._parse_statement() 3344 3345 if not this: 3346 self.raise_error("Failed to parse any statement following CTE") 3347 return cte 3348 3349 while isinstance(this, exp.Subquery) and this.is_wrapper: 3350 this = this.this 3351 3352 if "with_" in this.arg_types: 3353 this.set("with_", cte) 3354 else: 3355 self.raise_error(f"{this.key} does not support CTE") 3356 this = cte 3357 3358 return this 3359 3360 # duckdb supports leading with FROM x 3361 from_ = ( 3362 self._parse_from(joins=True, consume_pipe=True) 3363 if self._match(TokenType.FROM, advance=False) 3364 else None 3365 ) 3366 3367 if self._match(TokenType.SELECT): 3368 comments = self._prev_comments 3369 3370 hint = self._parse_hint() 3371 3372 if self._next and not self._next.token_type == TokenType.DOT: 3373 all_ = self._match(TokenType.ALL) 3374 distinct = self._match_set(self.DISTINCT_TOKENS) 3375 else: 3376 all_, distinct = None, None 3377 3378 kind = ( 3379 self._match(TokenType.ALIAS) 3380 and self._match_texts(("STRUCT", "VALUE")) 3381 and self._prev.text.upper() 3382 ) 3383 3384 if distinct: 3385 distinct = self.expression( 3386 exp.Distinct, 3387 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3388 ) 3389 3390 if all_ and distinct: 3391 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3392 3393 operation_modifiers = [] 3394 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3395 operation_modifiers.append(exp.var(self._prev.text.upper())) 3396 3397 limit = self._parse_limit(top=True) 3398 projections = self._parse_projections() 3399 3400 this = self.expression( 3401 exp.Select, 3402 kind=kind, 3403 hint=hint, 3404 distinct=distinct, 3405 expressions=projections, 3406 limit=limit, 3407 operation_modifiers=operation_modifiers or None, 3408 ) 3409 this.comments = comments 3410 3411 into = self._parse_into() 3412 if into: 3413 this.set("into", into) 3414 3415 if not from_: 3416 from_ = self._parse_from() 3417 3418 if from_: 3419 this.set("from_", from_) 3420 3421 this = self._parse_query_modifiers(this) 3422 elif (table or nested) and self._match(TokenType.L_PAREN): 3423 this = self._parse_wrapped_select(table=table) 3424 3425 # We return early here so that the UNION isn't attached to the subquery by the 3426 # following call to _parse_set_operations, but instead becomes the parent node 3427 self._match_r_paren() 3428 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3429 elif self._match(TokenType.VALUES, advance=False): 3430 this = self._parse_derived_table_values() 3431 elif from_: 3432 this = exp.select("*").from_(from_.this, copy=False) 3433 elif self._match(TokenType.SUMMARIZE): 3434 table = self._match(TokenType.TABLE) 3435 this = self._parse_select() or self._parse_string() or self._parse_table() 3436 return self.expression(exp.Summarize, this=this, table=table) 3437 elif self._match(TokenType.DESCRIBE): 3438 this = self._parse_describe() 3439 else: 3440 this = None 3441 3442 return self._parse_set_operations(this) if parse_set_operation else this 3443 3444 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3445 self._match_text_seq("SEARCH") 3446 3447 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3448 3449 if not kind: 3450 return None 3451 3452 self._match_text_seq("FIRST", "BY") 3453 3454 return self.expression( 3455 exp.RecursiveWithSearch, 3456 kind=kind, 3457 this=self._parse_id_var(), 3458 expression=self._match_text_seq("SET") and self._parse_id_var(), 3459 using=self._match_text_seq("USING") and self._parse_id_var(), 3460 ) 3461 3462 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3463 if not skip_with_token and not self._match(TokenType.WITH): 3464 return None 3465 3466 comments = self._prev_comments 3467 recursive = self._match(TokenType.RECURSIVE) 3468 3469 last_comments = None 3470 expressions = [] 3471 while True: 3472 cte = self._parse_cte() 3473 if isinstance(cte, exp.CTE): 3474 expressions.append(cte) 3475 if last_comments: 3476 cte.add_comments(last_comments) 3477 3478 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3479 break 3480 else: 3481 self._match(TokenType.WITH) 3482 3483 last_comments = self._prev_comments 3484 3485 return self.expression( 3486 exp.With, 3487 comments=comments, 3488 expressions=expressions, 3489 recursive=recursive, 3490 search=self._parse_recursive_with_search(), 3491 ) 3492 3493 def _parse_cte(self) -> t.Optional[exp.CTE]: 3494 index = self._index 3495 3496 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3497 if not alias or not alias.this: 3498 self.raise_error("Expected CTE to have alias") 3499 3500 key_expressions = ( 3501 self._parse_wrapped_id_vars() if self._match_text_seq("USING", "KEY") else None 3502 ) 3503 3504 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3505 self._retreat(index) 3506 return None 3507 3508 comments = self._prev_comments 3509 3510 if self._match_text_seq("NOT", "MATERIALIZED"): 3511 materialized = False 3512 elif self._match_text_seq("MATERIALIZED"): 3513 materialized = True 3514 else: 3515 materialized = None 3516 3517 cte = self.expression( 3518 exp.CTE, 3519 this=self._parse_wrapped(self._parse_statement), 3520 alias=alias, 3521 materialized=materialized, 3522 key_expressions=key_expressions, 3523 comments=comments, 3524 ) 3525 3526 values = cte.this 3527 if isinstance(values, exp.Values): 3528 if values.alias: 3529 cte.set("this", exp.select("*").from_(values)) 3530 else: 3531 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3532 3533 return cte 3534 3535 def _parse_table_alias( 3536 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3537 ) -> t.Optional[exp.TableAlias]: 3538 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3539 # so this section tries to parse the clause version and if it fails, it treats the token 3540 # as an identifier (alias) 3541 if self._can_parse_limit_or_offset(): 3542 return None 3543 3544 any_token = self._match(TokenType.ALIAS) 3545 alias = ( 3546 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3547 or self._parse_string_as_identifier() 3548 ) 3549 3550 index = self._index 3551 if self._match(TokenType.L_PAREN): 3552 columns = self._parse_csv(self._parse_function_parameter) 3553 self._match_r_paren() if columns else self._retreat(index) 3554 else: 3555 columns = None 3556 3557 if not alias and not columns: 3558 return None 3559 3560 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3561 3562 # We bubble up comments from the Identifier to the TableAlias 3563 if isinstance(alias, exp.Identifier): 3564 table_alias.add_comments(alias.pop_comments()) 3565 3566 return table_alias 3567 3568 def _parse_subquery( 3569 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3570 ) -> t.Optional[exp.Subquery]: 3571 if not this: 3572 return None 3573 3574 return self.expression( 3575 exp.Subquery, 3576 this=this, 3577 pivots=self._parse_pivots(), 3578 alias=self._parse_table_alias() if parse_alias else None, 3579 sample=self._parse_table_sample(), 3580 ) 3581 3582 def _implicit_unnests_to_explicit(self, this: E) -> E: 3583 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3584 3585 refs = {_norm(this.args["from_"].this.copy(), dialect=self.dialect).alias_or_name} 3586 for i, join in enumerate(this.args.get("joins") or []): 3587 table = join.this 3588 normalized_table = table.copy() 3589 normalized_table.meta["maybe_column"] = True 3590 normalized_table = _norm(normalized_table, dialect=self.dialect) 3591 3592 if isinstance(table, exp.Table) and not join.args.get("on"): 3593 if normalized_table.parts[0].name in refs: 3594 table_as_column = table.to_column() 3595 unnest = exp.Unnest(expressions=[table_as_column]) 3596 3597 # Table.to_column creates a parent Alias node that we want to convert to 3598 # a TableAlias and attach to the Unnest, so it matches the parser's output 3599 if isinstance(table.args.get("alias"), exp.TableAlias): 3600 table_as_column.replace(table_as_column.this) 3601 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3602 3603 table.replace(unnest) 3604 3605 refs.add(normalized_table.alias_or_name) 3606 3607 return this 3608 3609 @t.overload 3610 def _parse_query_modifiers(self, this: E) -> E: ... 3611 3612 @t.overload 3613 def _parse_query_modifiers(self, this: None) -> None: ... 3614 3615 def _parse_query_modifiers(self, this): 3616 if isinstance(this, self.MODIFIABLES): 3617 for join in self._parse_joins(): 3618 this.append("joins", join) 3619 for lateral in iter(self._parse_lateral, None): 3620 this.append("laterals", lateral) 3621 3622 while True: 3623 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3624 modifier_token = self._curr 3625 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3626 key, expression = parser(self) 3627 3628 if expression: 3629 if this.args.get(key): 3630 self.raise_error( 3631 f"Found multiple '{modifier_token.text.upper()}' clauses", 3632 token=modifier_token, 3633 ) 3634 3635 this.set(key, expression) 3636 if key == "limit": 3637 offset = expression.args.get("offset") 3638 expression.set("offset", None) 3639 3640 if offset: 3641 offset = exp.Offset(expression=offset) 3642 this.set("offset", offset) 3643 3644 limit_by_expressions = expression.expressions 3645 expression.set("expressions", None) 3646 offset.set("expressions", limit_by_expressions) 3647 continue 3648 break 3649 3650 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from_"): 3651 this = self._implicit_unnests_to_explicit(this) 3652 3653 return this 3654 3655 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3656 start = self._curr 3657 while self._curr: 3658 self._advance() 3659 3660 end = self._tokens[self._index - 1] 3661 return exp.Hint(expressions=[self._find_sql(start, end)]) 3662 3663 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3664 return self._parse_function_call() 3665 3666 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3667 start_index = self._index 3668 should_fallback_to_string = False 3669 3670 hints = [] 3671 try: 3672 for hint in iter( 3673 lambda: self._parse_csv( 3674 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3675 ), 3676 [], 3677 ): 3678 hints.extend(hint) 3679 except ParseError: 3680 should_fallback_to_string = True 3681 3682 if should_fallback_to_string or self._curr: 3683 self._retreat(start_index) 3684 return self._parse_hint_fallback_to_string() 3685 3686 return self.expression(exp.Hint, expressions=hints) 3687 3688 def _parse_hint(self) -> t.Optional[exp.Hint]: 3689 if self._match(TokenType.HINT) and self._prev_comments: 3690 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3691 3692 return None 3693 3694 def _parse_into(self) -> t.Optional[exp.Into]: 3695 if not self._match(TokenType.INTO): 3696 return None 3697 3698 temp = self._match(TokenType.TEMPORARY) 3699 unlogged = self._match_text_seq("UNLOGGED") 3700 self._match(TokenType.TABLE) 3701 3702 return self.expression( 3703 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3704 ) 3705 3706 def _parse_from( 3707 self, 3708 joins: bool = False, 3709 skip_from_token: bool = False, 3710 consume_pipe: bool = False, 3711 ) -> t.Optional[exp.From]: 3712 if not skip_from_token and not self._match(TokenType.FROM): 3713 return None 3714 3715 return self.expression( 3716 exp.From, 3717 comments=self._prev_comments, 3718 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3719 ) 3720 3721 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3722 return self.expression( 3723 exp.MatchRecognizeMeasure, 3724 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3725 this=self._parse_expression(), 3726 ) 3727 3728 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3729 if not self._match(TokenType.MATCH_RECOGNIZE): 3730 return None 3731 3732 self._match_l_paren() 3733 3734 partition = self._parse_partition_by() 3735 order = self._parse_order() 3736 3737 measures = ( 3738 self._parse_csv(self._parse_match_recognize_measure) 3739 if self._match_text_seq("MEASURES") 3740 else None 3741 ) 3742 3743 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3744 rows = exp.var("ONE ROW PER MATCH") 3745 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3746 text = "ALL ROWS PER MATCH" 3747 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3748 text += " SHOW EMPTY MATCHES" 3749 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3750 text += " OMIT EMPTY MATCHES" 3751 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3752 text += " WITH UNMATCHED ROWS" 3753 rows = exp.var(text) 3754 else: 3755 rows = None 3756 3757 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3758 text = "AFTER MATCH SKIP" 3759 if self._match_text_seq("PAST", "LAST", "ROW"): 3760 text += " PAST LAST ROW" 3761 elif self._match_text_seq("TO", "NEXT", "ROW"): 3762 text += " TO NEXT ROW" 3763 elif self._match_text_seq("TO", "FIRST"): 3764 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3765 elif self._match_text_seq("TO", "LAST"): 3766 text += f" TO LAST {self._advance_any().text}" # type: ignore 3767 after = exp.var(text) 3768 else: 3769 after = None 3770 3771 if self._match_text_seq("PATTERN"): 3772 self._match_l_paren() 3773 3774 if not self._curr: 3775 self.raise_error("Expecting )", self._curr) 3776 3777 paren = 1 3778 start = self._curr 3779 3780 while self._curr and paren > 0: 3781 if self._curr.token_type == TokenType.L_PAREN: 3782 paren += 1 3783 if self._curr.token_type == TokenType.R_PAREN: 3784 paren -= 1 3785 3786 end = self._prev 3787 self._advance() 3788 3789 if paren > 0: 3790 self.raise_error("Expecting )", self._curr) 3791 3792 pattern = exp.var(self._find_sql(start, end)) 3793 else: 3794 pattern = None 3795 3796 define = ( 3797 self._parse_csv(self._parse_name_as_expression) 3798 if self._match_text_seq("DEFINE") 3799 else None 3800 ) 3801 3802 self._match_r_paren() 3803 3804 return self.expression( 3805 exp.MatchRecognize, 3806 partition_by=partition, 3807 order=order, 3808 measures=measures, 3809 rows=rows, 3810 after=after, 3811 pattern=pattern, 3812 define=define, 3813 alias=self._parse_table_alias(), 3814 ) 3815 3816 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3817 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3818 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3819 cross_apply = False 3820 3821 if cross_apply is not None: 3822 this = self._parse_select(table=True) 3823 view = None 3824 outer = None 3825 elif self._match(TokenType.LATERAL): 3826 this = self._parse_select(table=True) 3827 view = self._match(TokenType.VIEW) 3828 outer = self._match(TokenType.OUTER) 3829 else: 3830 return None 3831 3832 if not this: 3833 this = ( 3834 self._parse_unnest() 3835 or self._parse_function() 3836 or self._parse_id_var(any_token=False) 3837 ) 3838 3839 while self._match(TokenType.DOT): 3840 this = exp.Dot( 3841 this=this, 3842 expression=self._parse_function() or self._parse_id_var(any_token=False), 3843 ) 3844 3845 ordinality: t.Optional[bool] = None 3846 3847 if view: 3848 table = self._parse_id_var(any_token=False) 3849 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3850 table_alias: t.Optional[exp.TableAlias] = self.expression( 3851 exp.TableAlias, this=table, columns=columns 3852 ) 3853 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3854 # We move the alias from the lateral's child node to the lateral itself 3855 table_alias = this.args["alias"].pop() 3856 else: 3857 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3858 table_alias = self._parse_table_alias() 3859 3860 return self.expression( 3861 exp.Lateral, 3862 this=this, 3863 view=view, 3864 outer=outer, 3865 alias=table_alias, 3866 cross_apply=cross_apply, 3867 ordinality=ordinality, 3868 ) 3869 3870 def _parse_stream(self) -> t.Optional[exp.Stream]: 3871 index = self._index 3872 if self._match_text_seq("STREAM"): 3873 this = self._try_parse(self._parse_table) 3874 if this: 3875 return self.expression(exp.Stream, this=this) 3876 3877 self._retreat(index) 3878 return None 3879 3880 def _parse_join_parts( 3881 self, 3882 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3883 return ( 3884 self._match_set(self.JOIN_METHODS) and self._prev, 3885 self._match_set(self.JOIN_SIDES) and self._prev, 3886 self._match_set(self.JOIN_KINDS) and self._prev, 3887 ) 3888 3889 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3890 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3891 this = self._parse_column() 3892 if isinstance(this, exp.Column): 3893 return this.this 3894 return this 3895 3896 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3897 3898 def _parse_join( 3899 self, skip_join_token: bool = False, parse_bracket: bool = False 3900 ) -> t.Optional[exp.Join]: 3901 if self._match(TokenType.COMMA): 3902 table = self._try_parse(self._parse_table) 3903 cross_join = self.expression(exp.Join, this=table) if table else None 3904 3905 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3906 cross_join.set("kind", "CROSS") 3907 3908 return cross_join 3909 3910 index = self._index 3911 method, side, kind = self._parse_join_parts() 3912 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3913 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3914 join_comments = self._prev_comments 3915 3916 if not skip_join_token and not join: 3917 self._retreat(index) 3918 kind = None 3919 method = None 3920 side = None 3921 3922 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3923 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3924 3925 if not skip_join_token and not join and not outer_apply and not cross_apply: 3926 return None 3927 3928 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3929 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3930 kwargs["expressions"] = self._parse_csv( 3931 lambda: self._parse_table(parse_bracket=parse_bracket) 3932 ) 3933 3934 if method: 3935 kwargs["method"] = method.text.upper() 3936 if side: 3937 kwargs["side"] = side.text.upper() 3938 if kind: 3939 kwargs["kind"] = kind.text.upper() 3940 if hint: 3941 kwargs["hint"] = hint 3942 3943 if self._match(TokenType.MATCH_CONDITION): 3944 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3945 3946 if self._match(TokenType.ON): 3947 kwargs["on"] = self._parse_disjunction() 3948 elif self._match(TokenType.USING): 3949 kwargs["using"] = self._parse_using_identifiers() 3950 elif ( 3951 not method 3952 and not (outer_apply or cross_apply) 3953 and not isinstance(kwargs["this"], exp.Unnest) 3954 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3955 ): 3956 index = self._index 3957 joins: t.Optional[list] = list(self._parse_joins()) 3958 3959 if joins and self._match(TokenType.ON): 3960 kwargs["on"] = self._parse_disjunction() 3961 elif joins and self._match(TokenType.USING): 3962 kwargs["using"] = self._parse_using_identifiers() 3963 else: 3964 joins = None 3965 self._retreat(index) 3966 3967 kwargs["this"].set("joins", joins if joins else None) 3968 3969 kwargs["pivots"] = self._parse_pivots() 3970 3971 comments = [c for token in (method, side, kind) if token for c in token.comments] 3972 comments = (join_comments or []) + comments 3973 3974 if ( 3975 self.ADD_JOIN_ON_TRUE 3976 and not kwargs.get("on") 3977 and not kwargs.get("using") 3978 and not kwargs.get("method") 3979 and kwargs.get("kind") in (None, "INNER", "OUTER") 3980 ): 3981 kwargs["on"] = exp.true() 3982 3983 return self.expression(exp.Join, comments=comments, **kwargs) 3984 3985 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3986 this = self._parse_disjunction() 3987 3988 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3989 return this 3990 3991 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3992 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3993 3994 return this 3995 3996 def _parse_index_params(self) -> exp.IndexParameters: 3997 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3998 3999 if self._match(TokenType.L_PAREN, advance=False): 4000 columns = self._parse_wrapped_csv(self._parse_with_operator) 4001 else: 4002 columns = None 4003 4004 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 4005 partition_by = self._parse_partition_by() 4006 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 4007 tablespace = ( 4008 self._parse_var(any_token=True) 4009 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 4010 else None 4011 ) 4012 where = self._parse_where() 4013 4014 on = self._parse_field() if self._match(TokenType.ON) else None 4015 4016 return self.expression( 4017 exp.IndexParameters, 4018 using=using, 4019 columns=columns, 4020 include=include, 4021 partition_by=partition_by, 4022 where=where, 4023 with_storage=with_storage, 4024 tablespace=tablespace, 4025 on=on, 4026 ) 4027 4028 def _parse_index( 4029 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 4030 ) -> t.Optional[exp.Index]: 4031 if index or anonymous: 4032 unique = None 4033 primary = None 4034 amp = None 4035 4036 self._match(TokenType.ON) 4037 self._match(TokenType.TABLE) # hive 4038 table = self._parse_table_parts(schema=True) 4039 else: 4040 unique = self._match(TokenType.UNIQUE) 4041 primary = self._match_text_seq("PRIMARY") 4042 amp = self._match_text_seq("AMP") 4043 4044 if not self._match(TokenType.INDEX): 4045 return None 4046 4047 index = self._parse_id_var() 4048 table = None 4049 4050 params = self._parse_index_params() 4051 4052 return self.expression( 4053 exp.Index, 4054 this=index, 4055 table=table, 4056 unique=unique, 4057 primary=primary, 4058 amp=amp, 4059 params=params, 4060 ) 4061 4062 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 4063 hints: t.List[exp.Expression] = [] 4064 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 4065 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 4066 hints.append( 4067 self.expression( 4068 exp.WithTableHint, 4069 expressions=self._parse_csv( 4070 lambda: self._parse_function() or self._parse_var(any_token=True) 4071 ), 4072 ) 4073 ) 4074 self._match_r_paren() 4075 else: 4076 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 4077 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 4078 hint = exp.IndexTableHint(this=self._prev.text.upper()) 4079 4080 self._match_set((TokenType.INDEX, TokenType.KEY)) 4081 if self._match(TokenType.FOR): 4082 hint.set("target", self._advance_any() and self._prev.text.upper()) 4083 4084 hint.set("expressions", self._parse_wrapped_id_vars()) 4085 hints.append(hint) 4086 4087 return hints or None 4088 4089 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4090 return ( 4091 (not schema and self._parse_function(optional_parens=False)) 4092 or self._parse_id_var(any_token=False) 4093 or self._parse_string_as_identifier() 4094 or self._parse_placeholder() 4095 ) 4096 4097 def _parse_table_parts( 4098 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4099 ) -> exp.Table: 4100 catalog = None 4101 db = None 4102 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4103 4104 while self._match(TokenType.DOT): 4105 if catalog: 4106 # This allows nesting the table in arbitrarily many dot expressions if needed 4107 table = self.expression( 4108 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4109 ) 4110 else: 4111 catalog = db 4112 db = table 4113 # "" used for tsql FROM a..b case 4114 table = self._parse_table_part(schema=schema) or "" 4115 4116 if ( 4117 wildcard 4118 and self._is_connected() 4119 and (isinstance(table, exp.Identifier) or not table) 4120 and self._match(TokenType.STAR) 4121 ): 4122 if isinstance(table, exp.Identifier): 4123 table.args["this"] += "*" 4124 else: 4125 table = exp.Identifier(this="*") 4126 4127 # We bubble up comments from the Identifier to the Table 4128 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4129 4130 if is_db_reference: 4131 catalog = db 4132 db = table 4133 table = None 4134 4135 if not table and not is_db_reference: 4136 self.raise_error(f"Expected table name but got {self._curr}") 4137 if not db and is_db_reference: 4138 self.raise_error(f"Expected database name but got {self._curr}") 4139 4140 table = self.expression( 4141 exp.Table, 4142 comments=comments, 4143 this=table, 4144 db=db, 4145 catalog=catalog, 4146 ) 4147 4148 changes = self._parse_changes() 4149 if changes: 4150 table.set("changes", changes) 4151 4152 at_before = self._parse_historical_data() 4153 if at_before: 4154 table.set("when", at_before) 4155 4156 pivots = self._parse_pivots() 4157 if pivots: 4158 table.set("pivots", pivots) 4159 4160 return table 4161 4162 def _parse_table( 4163 self, 4164 schema: bool = False, 4165 joins: bool = False, 4166 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4167 parse_bracket: bool = False, 4168 is_db_reference: bool = False, 4169 parse_partition: bool = False, 4170 consume_pipe: bool = False, 4171 ) -> t.Optional[exp.Expression]: 4172 stream = self._parse_stream() 4173 if stream: 4174 return stream 4175 4176 lateral = self._parse_lateral() 4177 if lateral: 4178 return lateral 4179 4180 unnest = self._parse_unnest() 4181 if unnest: 4182 return unnest 4183 4184 values = self._parse_derived_table_values() 4185 if values: 4186 return values 4187 4188 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4189 if subquery: 4190 if not subquery.args.get("pivots"): 4191 subquery.set("pivots", self._parse_pivots()) 4192 return subquery 4193 4194 bracket = parse_bracket and self._parse_bracket(None) 4195 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4196 4197 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4198 self._parse_table 4199 ) 4200 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4201 4202 only = self._match(TokenType.ONLY) 4203 4204 this = t.cast( 4205 exp.Expression, 4206 bracket 4207 or rows_from 4208 or self._parse_bracket( 4209 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4210 ), 4211 ) 4212 4213 if only: 4214 this.set("only", only) 4215 4216 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4217 self._match_text_seq("*") 4218 4219 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4220 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4221 this.set("partition", self._parse_partition()) 4222 4223 if schema: 4224 return self._parse_schema(this=this) 4225 4226 version = self._parse_version() 4227 4228 if version: 4229 this.set("version", version) 4230 4231 if self.dialect.ALIAS_POST_TABLESAMPLE: 4232 this.set("sample", self._parse_table_sample()) 4233 4234 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4235 if alias: 4236 this.set("alias", alias) 4237 4238 if self._match(TokenType.INDEXED_BY): 4239 this.set("indexed", self._parse_table_parts()) 4240 elif self._match_text_seq("NOT", "INDEXED"): 4241 this.set("indexed", False) 4242 4243 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4244 return self.expression( 4245 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4246 ) 4247 4248 this.set("hints", self._parse_table_hints()) 4249 4250 if not this.args.get("pivots"): 4251 this.set("pivots", self._parse_pivots()) 4252 4253 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4254 this.set("sample", self._parse_table_sample()) 4255 4256 if joins: 4257 for join in self._parse_joins(): 4258 this.append("joins", join) 4259 4260 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4261 this.set("ordinality", True) 4262 this.set("alias", self._parse_table_alias()) 4263 4264 return this 4265 4266 def _parse_version(self) -> t.Optional[exp.Version]: 4267 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4268 this = "TIMESTAMP" 4269 elif self._match(TokenType.VERSION_SNAPSHOT): 4270 this = "VERSION" 4271 else: 4272 return None 4273 4274 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4275 kind = self._prev.text.upper() 4276 start = self._parse_bitwise() 4277 self._match_texts(("TO", "AND")) 4278 end = self._parse_bitwise() 4279 expression: t.Optional[exp.Expression] = self.expression( 4280 exp.Tuple, expressions=[start, end] 4281 ) 4282 elif self._match_text_seq("CONTAINED", "IN"): 4283 kind = "CONTAINED IN" 4284 expression = self.expression( 4285 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4286 ) 4287 elif self._match(TokenType.ALL): 4288 kind = "ALL" 4289 expression = None 4290 else: 4291 self._match_text_seq("AS", "OF") 4292 kind = "AS OF" 4293 expression = self._parse_type() 4294 4295 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4296 4297 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4298 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4299 index = self._index 4300 historical_data = None 4301 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4302 this = self._prev.text.upper() 4303 kind = ( 4304 self._match(TokenType.L_PAREN) 4305 and self._match_texts(self.HISTORICAL_DATA_KIND) 4306 and self._prev.text.upper() 4307 ) 4308 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4309 4310 if expression: 4311 self._match_r_paren() 4312 historical_data = self.expression( 4313 exp.HistoricalData, this=this, kind=kind, expression=expression 4314 ) 4315 else: 4316 self._retreat(index) 4317 4318 return historical_data 4319 4320 def _parse_changes(self) -> t.Optional[exp.Changes]: 4321 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4322 return None 4323 4324 information = self._parse_var(any_token=True) 4325 self._match_r_paren() 4326 4327 return self.expression( 4328 exp.Changes, 4329 information=information, 4330 at_before=self._parse_historical_data(), 4331 end=self._parse_historical_data(), 4332 ) 4333 4334 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4335 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4336 return None 4337 4338 self._advance() 4339 4340 expressions = self._parse_wrapped_csv(self._parse_equality) 4341 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4342 4343 alias = self._parse_table_alias() if with_alias else None 4344 4345 if alias: 4346 if self.dialect.UNNEST_COLUMN_ONLY: 4347 if alias.args.get("columns"): 4348 self.raise_error("Unexpected extra column alias in unnest.") 4349 4350 alias.set("columns", [alias.this]) 4351 alias.set("this", None) 4352 4353 columns = alias.args.get("columns") or [] 4354 if offset and len(expressions) < len(columns): 4355 offset = columns.pop() 4356 4357 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4358 self._match(TokenType.ALIAS) 4359 offset = self._parse_id_var( 4360 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4361 ) or exp.to_identifier("offset") 4362 4363 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4364 4365 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4366 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4367 if not is_derived and not ( 4368 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4369 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4370 ): 4371 return None 4372 4373 expressions = self._parse_csv(self._parse_value) 4374 alias = self._parse_table_alias() 4375 4376 if is_derived: 4377 self._match_r_paren() 4378 4379 return self.expression( 4380 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4381 ) 4382 4383 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4384 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4385 as_modifier and self._match_text_seq("USING", "SAMPLE") 4386 ): 4387 return None 4388 4389 bucket_numerator = None 4390 bucket_denominator = None 4391 bucket_field = None 4392 percent = None 4393 size = None 4394 seed = None 4395 4396 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4397 matched_l_paren = self._match(TokenType.L_PAREN) 4398 4399 if self.TABLESAMPLE_CSV: 4400 num = None 4401 expressions = self._parse_csv(self._parse_primary) 4402 else: 4403 expressions = None 4404 num = ( 4405 self._parse_factor() 4406 if self._match(TokenType.NUMBER, advance=False) 4407 else self._parse_primary() or self._parse_placeholder() 4408 ) 4409 4410 if self._match_text_seq("BUCKET"): 4411 bucket_numerator = self._parse_number() 4412 self._match_text_seq("OUT", "OF") 4413 bucket_denominator = bucket_denominator = self._parse_number() 4414 self._match(TokenType.ON) 4415 bucket_field = self._parse_field() 4416 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4417 percent = num 4418 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4419 size = num 4420 else: 4421 percent = num 4422 4423 if matched_l_paren: 4424 self._match_r_paren() 4425 4426 if self._match(TokenType.L_PAREN): 4427 method = self._parse_var(upper=True) 4428 seed = self._match(TokenType.COMMA) and self._parse_number() 4429 self._match_r_paren() 4430 elif self._match_texts(("SEED", "REPEATABLE")): 4431 seed = self._parse_wrapped(self._parse_number) 4432 4433 if not method and self.DEFAULT_SAMPLING_METHOD: 4434 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4435 4436 return self.expression( 4437 exp.TableSample, 4438 expressions=expressions, 4439 method=method, 4440 bucket_numerator=bucket_numerator, 4441 bucket_denominator=bucket_denominator, 4442 bucket_field=bucket_field, 4443 percent=percent, 4444 size=size, 4445 seed=seed, 4446 ) 4447 4448 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4449 return list(iter(self._parse_pivot, None)) or None 4450 4451 def _parse_joins(self) -> t.Iterator[exp.Join]: 4452 return iter(self._parse_join, None) 4453 4454 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4455 if not self._match(TokenType.INTO): 4456 return None 4457 4458 return self.expression( 4459 exp.UnpivotColumns, 4460 this=self._match_text_seq("NAME") and self._parse_column(), 4461 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4462 ) 4463 4464 # https://duckdb.org/docs/sql/statements/pivot 4465 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4466 def _parse_on() -> t.Optional[exp.Expression]: 4467 this = self._parse_bitwise() 4468 4469 if self._match(TokenType.IN): 4470 # PIVOT ... ON col IN (row_val1, row_val2) 4471 return self._parse_in(this) 4472 if self._match(TokenType.ALIAS, advance=False): 4473 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4474 return self._parse_alias(this) 4475 4476 return this 4477 4478 this = self._parse_table() 4479 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4480 into = self._parse_unpivot_columns() 4481 using = self._match(TokenType.USING) and self._parse_csv( 4482 lambda: self._parse_alias(self._parse_column()) 4483 ) 4484 group = self._parse_group() 4485 4486 return self.expression( 4487 exp.Pivot, 4488 this=this, 4489 expressions=expressions, 4490 using=using, 4491 group=group, 4492 unpivot=is_unpivot, 4493 into=into, 4494 ) 4495 4496 def _parse_pivot_in(self) -> exp.In: 4497 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4498 this = self._parse_select_or_expression() 4499 4500 self._match(TokenType.ALIAS) 4501 alias = self._parse_bitwise() 4502 if alias: 4503 if isinstance(alias, exp.Column) and not alias.db: 4504 alias = alias.this 4505 return self.expression(exp.PivotAlias, this=this, alias=alias) 4506 4507 return this 4508 4509 value = self._parse_column() 4510 4511 if not self._match(TokenType.IN): 4512 self.raise_error("Expecting IN") 4513 4514 if self._match(TokenType.L_PAREN): 4515 if self._match(TokenType.ANY): 4516 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4517 else: 4518 exprs = self._parse_csv(_parse_aliased_expression) 4519 self._match_r_paren() 4520 return self.expression(exp.In, this=value, expressions=exprs) 4521 4522 return self.expression(exp.In, this=value, field=self._parse_id_var()) 4523 4524 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4525 func = self._parse_function() 4526 if not func: 4527 if self._prev and self._prev.token_type == TokenType.COMMA: 4528 return None 4529 self.raise_error("Expecting an aggregation function in PIVOT") 4530 4531 return self._parse_alias(func) 4532 4533 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4534 index = self._index 4535 include_nulls = None 4536 4537 if self._match(TokenType.PIVOT): 4538 unpivot = False 4539 elif self._match(TokenType.UNPIVOT): 4540 unpivot = True 4541 4542 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4543 if self._match_text_seq("INCLUDE", "NULLS"): 4544 include_nulls = True 4545 elif self._match_text_seq("EXCLUDE", "NULLS"): 4546 include_nulls = False 4547 else: 4548 return None 4549 4550 expressions = [] 4551 4552 if not self._match(TokenType.L_PAREN): 4553 self._retreat(index) 4554 return None 4555 4556 if unpivot: 4557 expressions = self._parse_csv(self._parse_column) 4558 else: 4559 expressions = self._parse_csv(self._parse_pivot_aggregation) 4560 4561 if not expressions: 4562 self.raise_error("Failed to parse PIVOT's aggregation list") 4563 4564 if not self._match(TokenType.FOR): 4565 self.raise_error("Expecting FOR") 4566 4567 fields = [] 4568 while True: 4569 field = self._try_parse(self._parse_pivot_in) 4570 if not field: 4571 break 4572 fields.append(field) 4573 4574 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4575 self._parse_bitwise 4576 ) 4577 4578 group = self._parse_group() 4579 4580 self._match_r_paren() 4581 4582 pivot = self.expression( 4583 exp.Pivot, 4584 expressions=expressions, 4585 fields=fields, 4586 unpivot=unpivot, 4587 include_nulls=include_nulls, 4588 default_on_null=default_on_null, 4589 group=group, 4590 ) 4591 4592 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4593 pivot.set("alias", self._parse_table_alias()) 4594 4595 if not unpivot: 4596 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4597 4598 columns: t.List[exp.Expression] = [] 4599 all_fields = [] 4600 for pivot_field in pivot.fields: 4601 pivot_field_expressions = pivot_field.expressions 4602 4603 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4604 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4605 continue 4606 4607 all_fields.append( 4608 [ 4609 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4610 for fld in pivot_field_expressions 4611 ] 4612 ) 4613 4614 if all_fields: 4615 if names: 4616 all_fields.append(names) 4617 4618 # Generate all possible combinations of the pivot columns 4619 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4620 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4621 for fld_parts_tuple in itertools.product(*all_fields): 4622 fld_parts = list(fld_parts_tuple) 4623 4624 if names and self.PREFIXED_PIVOT_COLUMNS: 4625 # Move the "name" to the front of the list 4626 fld_parts.insert(0, fld_parts.pop(-1)) 4627 4628 columns.append(exp.to_identifier("_".join(fld_parts))) 4629 4630 pivot.set("columns", columns) 4631 4632 return pivot 4633 4634 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4635 return [agg.alias for agg in aggregations if agg.alias] 4636 4637 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4638 if not skip_where_token and not self._match(TokenType.PREWHERE): 4639 return None 4640 4641 return self.expression( 4642 exp.PreWhere, comments=self._prev_comments, this=self._parse_disjunction() 4643 ) 4644 4645 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4646 if not skip_where_token and not self._match(TokenType.WHERE): 4647 return None 4648 4649 return self.expression( 4650 exp.Where, comments=self._prev_comments, this=self._parse_disjunction() 4651 ) 4652 4653 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4654 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4655 return None 4656 comments = self._prev_comments 4657 4658 elements: t.Dict[str, t.Any] = defaultdict(list) 4659 4660 if self._match(TokenType.ALL): 4661 elements["all"] = True 4662 elif self._match(TokenType.DISTINCT): 4663 elements["all"] = False 4664 4665 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4666 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4667 4668 while True: 4669 index = self._index 4670 4671 elements["expressions"].extend( 4672 self._parse_csv( 4673 lambda: None 4674 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4675 else self._parse_disjunction() 4676 ) 4677 ) 4678 4679 before_with_index = self._index 4680 with_prefix = self._match(TokenType.WITH) 4681 4682 if cube_or_rollup := self._parse_cube_or_rollup(with_prefix=with_prefix): 4683 key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube" 4684 elements[key].append(cube_or_rollup) 4685 elif grouping_sets := self._parse_grouping_sets(): 4686 elements["grouping_sets"].append(grouping_sets) 4687 elif self._match_text_seq("TOTALS"): 4688 elements["totals"] = True # type: ignore 4689 4690 if before_with_index <= self._index <= before_with_index + 1: 4691 self._retreat(before_with_index) 4692 break 4693 4694 if index == self._index: 4695 break 4696 4697 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4698 4699 def _parse_cube_or_rollup(self, with_prefix: bool = False) -> t.Optional[exp.Cube | exp.Rollup]: 4700 if self._match(TokenType.CUBE): 4701 kind: t.Type[exp.Cube | exp.Rollup] = exp.Cube 4702 elif self._match(TokenType.ROLLUP): 4703 kind = exp.Rollup 4704 else: 4705 return None 4706 4707 return self.expression( 4708 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_bitwise) 4709 ) 4710 4711 def _parse_grouping_sets(self) -> t.Optional[exp.GroupingSets]: 4712 if self._match(TokenType.GROUPING_SETS): 4713 return self.expression( 4714 exp.GroupingSets, expressions=self._parse_wrapped_csv(self._parse_grouping_set) 4715 ) 4716 return None 4717 4718 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4719 return self._parse_grouping_sets() or self._parse_cube_or_rollup() or self._parse_bitwise() 4720 4721 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4722 if not skip_having_token and not self._match(TokenType.HAVING): 4723 return None 4724 return self.expression( 4725 exp.Having, comments=self._prev_comments, this=self._parse_disjunction() 4726 ) 4727 4728 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4729 if not self._match(TokenType.QUALIFY): 4730 return None 4731 return self.expression(exp.Qualify, this=self._parse_disjunction()) 4732 4733 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4734 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4735 exp.Prior, this=self._parse_bitwise() 4736 ) 4737 connect = self._parse_disjunction() 4738 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4739 return connect 4740 4741 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4742 if skip_start_token: 4743 start = None 4744 elif self._match(TokenType.START_WITH): 4745 start = self._parse_disjunction() 4746 else: 4747 return None 4748 4749 self._match(TokenType.CONNECT_BY) 4750 nocycle = self._match_text_seq("NOCYCLE") 4751 connect = self._parse_connect_with_prior() 4752 4753 if not start and self._match(TokenType.START_WITH): 4754 start = self._parse_disjunction() 4755 4756 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4757 4758 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4759 this = self._parse_id_var(any_token=True) 4760 if self._match(TokenType.ALIAS): 4761 this = self.expression(exp.Alias, alias=this, this=self._parse_disjunction()) 4762 return this 4763 4764 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4765 if self._match_text_seq("INTERPOLATE"): 4766 return self._parse_wrapped_csv(self._parse_name_as_expression) 4767 return None 4768 4769 def _parse_order( 4770 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4771 ) -> t.Optional[exp.Expression]: 4772 siblings = None 4773 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4774 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4775 return this 4776 4777 siblings = True 4778 4779 return self.expression( 4780 exp.Order, 4781 comments=self._prev_comments, 4782 this=this, 4783 expressions=self._parse_csv(self._parse_ordered), 4784 siblings=siblings, 4785 ) 4786 4787 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4788 if not self._match(token): 4789 return None 4790 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4791 4792 def _parse_ordered( 4793 self, parse_method: t.Optional[t.Callable] = None 4794 ) -> t.Optional[exp.Ordered]: 4795 this = parse_method() if parse_method else self._parse_disjunction() 4796 if not this: 4797 return None 4798 4799 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4800 this = exp.var("ALL") 4801 4802 asc = self._match(TokenType.ASC) 4803 desc = self._match(TokenType.DESC) or (asc and False) 4804 4805 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4806 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4807 4808 nulls_first = is_nulls_first or False 4809 explicitly_null_ordered = is_nulls_first or is_nulls_last 4810 4811 if ( 4812 not explicitly_null_ordered 4813 and ( 4814 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4815 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4816 ) 4817 and self.dialect.NULL_ORDERING != "nulls_are_last" 4818 ): 4819 nulls_first = True 4820 4821 if self._match_text_seq("WITH", "FILL"): 4822 with_fill = self.expression( 4823 exp.WithFill, 4824 from_=self._match(TokenType.FROM) and self._parse_bitwise(), 4825 to=self._match_text_seq("TO") and self._parse_bitwise(), 4826 step=self._match_text_seq("STEP") and self._parse_bitwise(), 4827 interpolate=self._parse_interpolate(), 4828 ) 4829 else: 4830 with_fill = None 4831 4832 return self.expression( 4833 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4834 ) 4835 4836 def _parse_limit_options(self) -> t.Optional[exp.LimitOptions]: 4837 percent = self._match_set((TokenType.PERCENT, TokenType.MOD)) 4838 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4839 self._match_text_seq("ONLY") 4840 with_ties = self._match_text_seq("WITH", "TIES") 4841 4842 if not (percent or rows or with_ties): 4843 return None 4844 4845 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4846 4847 def _parse_limit( 4848 self, 4849 this: t.Optional[exp.Expression] = None, 4850 top: bool = False, 4851 skip_limit_token: bool = False, 4852 ) -> t.Optional[exp.Expression]: 4853 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4854 comments = self._prev_comments 4855 if top: 4856 limit_paren = self._match(TokenType.L_PAREN) 4857 expression = self._parse_term() if limit_paren else self._parse_number() 4858 4859 if limit_paren: 4860 self._match_r_paren() 4861 4862 else: 4863 # Parsing LIMIT x% (i.e x PERCENT) as a term leads to an error, since 4864 # we try to build an exp.Mod expr. For that matter, we backtrack and instead 4865 # consume the factor plus parse the percentage separately 4866 index = self._index 4867 expression = self._try_parse(self._parse_term) 4868 if isinstance(expression, exp.Mod): 4869 self._retreat(index) 4870 expression = self._parse_factor() 4871 elif not expression: 4872 expression = self._parse_factor() 4873 limit_options = self._parse_limit_options() 4874 4875 if self._match(TokenType.COMMA): 4876 offset = expression 4877 expression = self._parse_term() 4878 else: 4879 offset = None 4880 4881 limit_exp = self.expression( 4882 exp.Limit, 4883 this=this, 4884 expression=expression, 4885 offset=offset, 4886 comments=comments, 4887 limit_options=limit_options, 4888 expressions=self._parse_limit_by(), 4889 ) 4890 4891 return limit_exp 4892 4893 if self._match(TokenType.FETCH): 4894 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4895 direction = self._prev.text.upper() if direction else "FIRST" 4896 4897 count = self._parse_field(tokens=self.FETCH_TOKENS) 4898 4899 return self.expression( 4900 exp.Fetch, 4901 direction=direction, 4902 count=count, 4903 limit_options=self._parse_limit_options(), 4904 ) 4905 4906 return this 4907 4908 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4909 if not self._match(TokenType.OFFSET): 4910 return this 4911 4912 count = self._parse_term() 4913 self._match_set((TokenType.ROW, TokenType.ROWS)) 4914 4915 return self.expression( 4916 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4917 ) 4918 4919 def _can_parse_limit_or_offset(self) -> bool: 4920 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4921 return False 4922 4923 index = self._index 4924 result = bool( 4925 self._try_parse(self._parse_limit, retreat=True) 4926 or self._try_parse(self._parse_offset, retreat=True) 4927 ) 4928 self._retreat(index) 4929 return result 4930 4931 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4932 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4933 4934 def _parse_locks(self) -> t.List[exp.Lock]: 4935 locks = [] 4936 while True: 4937 update, key = None, None 4938 if self._match_text_seq("FOR", "UPDATE"): 4939 update = True 4940 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4941 "LOCK", "IN", "SHARE", "MODE" 4942 ): 4943 update = False 4944 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4945 update, key = False, True 4946 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4947 update, key = True, True 4948 else: 4949 break 4950 4951 expressions = None 4952 if self._match_text_seq("OF"): 4953 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4954 4955 wait: t.Optional[bool | exp.Expression] = None 4956 if self._match_text_seq("NOWAIT"): 4957 wait = True 4958 elif self._match_text_seq("WAIT"): 4959 wait = self._parse_primary() 4960 elif self._match_text_seq("SKIP", "LOCKED"): 4961 wait = False 4962 4963 locks.append( 4964 self.expression( 4965 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4966 ) 4967 ) 4968 4969 return locks 4970 4971 def parse_set_operation( 4972 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4973 ) -> t.Optional[exp.Expression]: 4974 start = self._index 4975 _, side_token, kind_token = self._parse_join_parts() 4976 4977 side = side_token.text if side_token else None 4978 kind = kind_token.text if kind_token else None 4979 4980 if not self._match_set(self.SET_OPERATIONS): 4981 self._retreat(start) 4982 return None 4983 4984 token_type = self._prev.token_type 4985 4986 if token_type == TokenType.UNION: 4987 operation: t.Type[exp.SetOperation] = exp.Union 4988 elif token_type == TokenType.EXCEPT: 4989 operation = exp.Except 4990 else: 4991 operation = exp.Intersect 4992 4993 comments = self._prev.comments 4994 4995 if self._match(TokenType.DISTINCT): 4996 distinct: t.Optional[bool] = True 4997 elif self._match(TokenType.ALL): 4998 distinct = False 4999 else: 5000 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 5001 if distinct is None: 5002 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 5003 5004 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 5005 "STRICT", "CORRESPONDING" 5006 ) 5007 if self._match_text_seq("CORRESPONDING"): 5008 by_name = True 5009 if not side and not kind: 5010 kind = "INNER" 5011 5012 on_column_list = None 5013 if by_name and self._match_texts(("ON", "BY")): 5014 on_column_list = self._parse_wrapped_csv(self._parse_column) 5015 5016 expression = self._parse_select( 5017 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 5018 ) 5019 5020 return self.expression( 5021 operation, 5022 comments=comments, 5023 this=this, 5024 distinct=distinct, 5025 by_name=by_name, 5026 expression=expression, 5027 side=side, 5028 kind=kind, 5029 on=on_column_list, 5030 ) 5031 5032 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5033 while this: 5034 setop = self.parse_set_operation(this) 5035 if not setop: 5036 break 5037 this = setop 5038 5039 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 5040 expression = this.expression 5041 5042 if expression: 5043 for arg in self.SET_OP_MODIFIERS: 5044 expr = expression.args.get(arg) 5045 if expr: 5046 this.set(arg, expr.pop()) 5047 5048 return this 5049 5050 def _parse_expression(self) -> t.Optional[exp.Expression]: 5051 return self._parse_alias(self._parse_assignment()) 5052 5053 def _parse_assignment(self) -> t.Optional[exp.Expression]: 5054 this = self._parse_disjunction() 5055 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 5056 # This allows us to parse <non-identifier token> := <expr> 5057 this = exp.column( 5058 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 5059 ) 5060 5061 while self._match_set(self.ASSIGNMENT): 5062 if isinstance(this, exp.Column) and len(this.parts) == 1: 5063 this = this.this 5064 5065 this = self.expression( 5066 self.ASSIGNMENT[self._prev.token_type], 5067 this=this, 5068 comments=self._prev_comments, 5069 expression=self._parse_assignment(), 5070 ) 5071 5072 return this 5073 5074 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 5075 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 5076 5077 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 5078 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 5079 5080 def _parse_equality(self) -> t.Optional[exp.Expression]: 5081 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 5082 5083 def _parse_comparison(self) -> t.Optional[exp.Expression]: 5084 return self._parse_tokens(self._parse_range, self.COMPARISON) 5085 5086 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5087 this = this or self._parse_bitwise() 5088 negate = self._match(TokenType.NOT) 5089 5090 if self._match_set(self.RANGE_PARSERS): 5091 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 5092 if not expression: 5093 return this 5094 5095 this = expression 5096 elif self._match(TokenType.ISNULL) or (negate and self._match(TokenType.NULL)): 5097 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5098 5099 # Postgres supports ISNULL and NOTNULL for conditions. 5100 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5101 if self._match(TokenType.NOTNULL): 5102 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5103 this = self.expression(exp.Not, this=this) 5104 5105 if negate: 5106 this = self._negate_range(this) 5107 5108 if self._match(TokenType.IS): 5109 this = self._parse_is(this) 5110 5111 return this 5112 5113 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5114 if not this: 5115 return this 5116 5117 return self.expression(exp.Not, this=this) 5118 5119 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5120 index = self._index - 1 5121 negate = self._match(TokenType.NOT) 5122 5123 if self._match_text_seq("DISTINCT", "FROM"): 5124 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5125 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5126 5127 if self._match(TokenType.JSON): 5128 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5129 5130 if self._match_text_seq("WITH"): 5131 _with = True 5132 elif self._match_text_seq("WITHOUT"): 5133 _with = False 5134 else: 5135 _with = None 5136 5137 unique = self._match(TokenType.UNIQUE) 5138 self._match_text_seq("KEYS") 5139 expression: t.Optional[exp.Expression] = self.expression( 5140 exp.JSON, 5141 this=kind, 5142 with_=_with, 5143 unique=unique, 5144 ) 5145 else: 5146 expression = self._parse_null() or self._parse_bitwise() 5147 if not expression: 5148 self._retreat(index) 5149 return None 5150 5151 this = self.expression(exp.Is, this=this, expression=expression) 5152 this = self.expression(exp.Not, this=this) if negate else this 5153 return self._parse_column_ops(this) 5154 5155 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5156 unnest = self._parse_unnest(with_alias=False) 5157 if unnest: 5158 this = self.expression(exp.In, this=this, unnest=unnest) 5159 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5160 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5161 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5162 5163 if len(expressions) == 1 and isinstance(query := expressions[0], exp.Query): 5164 this = self.expression( 5165 exp.In, 5166 this=this, 5167 query=self._parse_query_modifiers(query).subquery(copy=False), 5168 ) 5169 else: 5170 this = self.expression(exp.In, this=this, expressions=expressions) 5171 5172 if matched_l_paren: 5173 self._match_r_paren(this) 5174 elif not self._match(TokenType.R_BRACKET, expression=this): 5175 self.raise_error("Expecting ]") 5176 else: 5177 this = self.expression(exp.In, this=this, field=self._parse_column()) 5178 5179 return this 5180 5181 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5182 symmetric = None 5183 if self._match_text_seq("SYMMETRIC"): 5184 symmetric = True 5185 elif self._match_text_seq("ASYMMETRIC"): 5186 symmetric = False 5187 5188 low = self._parse_bitwise() 5189 self._match(TokenType.AND) 5190 high = self._parse_bitwise() 5191 5192 return self.expression( 5193 exp.Between, 5194 this=this, 5195 low=low, 5196 high=high, 5197 symmetric=symmetric, 5198 ) 5199 5200 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5201 if not self._match(TokenType.ESCAPE): 5202 return this 5203 return self.expression( 5204 exp.Escape, this=this, expression=self._parse_string() or self._parse_null() 5205 ) 5206 5207 def _parse_interval_span(self, this: exp.Expression) -> exp.Interval: 5208 # handle day-time format interval span with omitted units: 5209 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5210 interval_span_units_omitted = None 5211 if ( 5212 this 5213 and this.is_string 5214 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5215 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5216 ): 5217 index = self._index 5218 5219 # Var "TO" Var 5220 first_unit = self._parse_var(any_token=True, upper=True) 5221 second_unit = None 5222 if first_unit and self._match_text_seq("TO"): 5223 second_unit = self._parse_var(any_token=True, upper=True) 5224 5225 interval_span_units_omitted = not (first_unit and second_unit) 5226 5227 self._retreat(index) 5228 5229 unit = ( 5230 None 5231 if interval_span_units_omitted 5232 else ( 5233 self._parse_function() 5234 or ( 5235 not self._match(TokenType.ALIAS, advance=False) 5236 and self._parse_var(any_token=True, upper=True) 5237 ) 5238 ) 5239 ) 5240 5241 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5242 # each INTERVAL expression into this canonical form so it's easy to transpile 5243 if this and this.is_number: 5244 this = exp.Literal.string(this.to_py()) 5245 elif this and this.is_string: 5246 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5247 if parts and unit: 5248 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5249 unit = None 5250 self._retreat(self._index - 1) 5251 5252 if len(parts) == 1: 5253 this = exp.Literal.string(parts[0][0]) 5254 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5255 5256 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5257 unit = self.expression( 5258 exp.IntervalSpan, 5259 this=unit, 5260 expression=self._parse_function() or self._parse_var(any_token=True, upper=True), 5261 ) 5262 5263 return self.expression(exp.Interval, this=this, unit=unit) 5264 5265 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5266 index = self._index 5267 5268 if not self._match(TokenType.INTERVAL) and match_interval: 5269 return None 5270 5271 if self._match(TokenType.STRING, advance=False): 5272 this = self._parse_primary() 5273 else: 5274 this = self._parse_term() 5275 5276 if not this or ( 5277 isinstance(this, exp.Column) 5278 and not this.table 5279 and not this.this.quoted 5280 and self._curr 5281 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 5282 ): 5283 self._retreat(index) 5284 return None 5285 5286 interval = self._parse_interval_span(this) 5287 5288 index = self._index 5289 self._match(TokenType.PLUS) 5290 5291 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5292 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5293 return self.expression( 5294 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5295 ) 5296 5297 self._retreat(index) 5298 return interval 5299 5300 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5301 this = self._parse_term() 5302 5303 while True: 5304 if self._match_set(self.BITWISE): 5305 this = self.expression( 5306 self.BITWISE[self._prev.token_type], 5307 this=this, 5308 expression=self._parse_term(), 5309 ) 5310 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5311 this = self.expression( 5312 exp.DPipe, 5313 this=this, 5314 expression=self._parse_term(), 5315 safe=not self.dialect.STRICT_STRING_CONCAT, 5316 ) 5317 elif self._match(TokenType.DQMARK): 5318 this = self.expression( 5319 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5320 ) 5321 elif self._match_pair(TokenType.LT, TokenType.LT): 5322 this = self.expression( 5323 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5324 ) 5325 elif self._match_pair(TokenType.GT, TokenType.GT): 5326 this = self.expression( 5327 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5328 ) 5329 else: 5330 break 5331 5332 return this 5333 5334 def _parse_term(self) -> t.Optional[exp.Expression]: 5335 this = self._parse_factor() 5336 5337 while self._match_set(self.TERM): 5338 klass = self.TERM[self._prev.token_type] 5339 comments = self._prev_comments 5340 expression = self._parse_factor() 5341 5342 this = self.expression(klass, this=this, comments=comments, expression=expression) 5343 5344 if isinstance(this, exp.Collate): 5345 expr = this.expression 5346 5347 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5348 # fallback to Identifier / Var 5349 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5350 ident = expr.this 5351 if isinstance(ident, exp.Identifier): 5352 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5353 5354 return this 5355 5356 def _parse_factor(self) -> t.Optional[exp.Expression]: 5357 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5358 this = self._parse_at_time_zone(parse_method()) 5359 5360 while self._match_set(self.FACTOR): 5361 klass = self.FACTOR[self._prev.token_type] 5362 comments = self._prev_comments 5363 expression = parse_method() 5364 5365 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5366 self._retreat(self._index - 1) 5367 return this 5368 5369 this = self.expression(klass, this=this, comments=comments, expression=expression) 5370 5371 if isinstance(this, exp.Div): 5372 this.set("typed", self.dialect.TYPED_DIVISION) 5373 this.set("safe", self.dialect.SAFE_DIVISION) 5374 5375 return this 5376 5377 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5378 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5379 5380 def _parse_unary(self) -> t.Optional[exp.Expression]: 5381 if self._match_set(self.UNARY_PARSERS): 5382 return self.UNARY_PARSERS[self._prev.token_type](self) 5383 return self._parse_type() 5384 5385 def _parse_type( 5386 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5387 ) -> t.Optional[exp.Expression]: 5388 interval = parse_interval and self._parse_interval() 5389 if interval: 5390 return self._parse_column_ops(interval) 5391 5392 index = self._index 5393 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5394 5395 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5396 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5397 if isinstance(data_type, exp.Cast): 5398 # This constructor can contain ops directly after it, for instance struct unnesting: 5399 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5400 return self._parse_column_ops(data_type) 5401 5402 if data_type: 5403 index2 = self._index 5404 this = self._parse_primary() 5405 5406 if isinstance(this, exp.Literal): 5407 literal = this.name 5408 this = self._parse_column_ops(this) 5409 5410 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5411 if parser: 5412 return parser(self, this, data_type) 5413 5414 if ( 5415 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5416 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5417 and TIME_ZONE_RE.search(literal) 5418 ): 5419 data_type = exp.DataType.build("TIMESTAMPTZ") 5420 5421 return self.expression(exp.Cast, this=this, to=data_type) 5422 5423 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5424 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5425 # 5426 # If the index difference here is greater than 1, that means the parser itself must have 5427 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5428 # 5429 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5430 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5431 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5432 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5433 # 5434 # In these cases, we don't really want to return the converted type, but instead retreat 5435 # and try to parse a Column or Identifier in the section below. 5436 if data_type.expressions and index2 - index > 1: 5437 self._retreat(index2) 5438 return self._parse_column_ops(data_type) 5439 5440 self._retreat(index) 5441 5442 if fallback_to_identifier: 5443 return self._parse_id_var() 5444 5445 this = self._parse_column() 5446 return this and self._parse_column_ops(this) 5447 5448 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5449 this = self._parse_type() 5450 if not this: 5451 return None 5452 5453 if isinstance(this, exp.Column) and not this.table: 5454 this = exp.var(this.name.upper()) 5455 5456 return self.expression( 5457 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5458 ) 5459 5460 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5461 type_name = identifier.name 5462 5463 while self._match(TokenType.DOT): 5464 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5465 5466 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5467 5468 def _parse_types( 5469 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5470 ) -> t.Optional[exp.Expression]: 5471 index = self._index 5472 5473 this: t.Optional[exp.Expression] = None 5474 prefix = self._match_text_seq("SYSUDTLIB", ".") 5475 5476 if self._match_set(self.TYPE_TOKENS): 5477 type_token = self._prev.token_type 5478 else: 5479 type_token = None 5480 identifier = allow_identifiers and self._parse_id_var( 5481 any_token=False, tokens=(TokenType.VAR,) 5482 ) 5483 if isinstance(identifier, exp.Identifier): 5484 try: 5485 tokens = self.dialect.tokenize(identifier.name) 5486 except TokenError: 5487 tokens = None 5488 5489 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5490 type_token = tokens[0].token_type 5491 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5492 this = self._parse_user_defined_type(identifier) 5493 else: 5494 self._retreat(self._index - 1) 5495 return None 5496 else: 5497 return None 5498 5499 if type_token == TokenType.PSEUDO_TYPE: 5500 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5501 5502 if type_token == TokenType.OBJECT_IDENTIFIER: 5503 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5504 5505 # https://materialize.com/docs/sql/types/map/ 5506 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5507 key_type = self._parse_types( 5508 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5509 ) 5510 if not self._match(TokenType.FARROW): 5511 self._retreat(index) 5512 return None 5513 5514 value_type = self._parse_types( 5515 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5516 ) 5517 if not self._match(TokenType.R_BRACKET): 5518 self._retreat(index) 5519 return None 5520 5521 return exp.DataType( 5522 this=exp.DataType.Type.MAP, 5523 expressions=[key_type, value_type], 5524 nested=True, 5525 prefix=prefix, 5526 ) 5527 5528 nested = type_token in self.NESTED_TYPE_TOKENS 5529 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5530 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5531 expressions = None 5532 maybe_func = False 5533 5534 if self._match(TokenType.L_PAREN): 5535 if is_struct: 5536 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5537 elif nested: 5538 expressions = self._parse_csv( 5539 lambda: self._parse_types( 5540 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5541 ) 5542 ) 5543 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5544 this = expressions[0] 5545 this.set("nullable", True) 5546 self._match_r_paren() 5547 return this 5548 elif type_token in self.ENUM_TYPE_TOKENS: 5549 expressions = self._parse_csv(self._parse_equality) 5550 elif is_aggregate: 5551 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5552 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5553 ) 5554 if not func_or_ident: 5555 return None 5556 expressions = [func_or_ident] 5557 if self._match(TokenType.COMMA): 5558 expressions.extend( 5559 self._parse_csv( 5560 lambda: self._parse_types( 5561 check_func=check_func, 5562 schema=schema, 5563 allow_identifiers=allow_identifiers, 5564 ) 5565 ) 5566 ) 5567 else: 5568 expressions = self._parse_csv(self._parse_type_size) 5569 5570 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5571 if type_token == TokenType.VECTOR and len(expressions) == 2: 5572 expressions = self._parse_vector_expressions(expressions) 5573 5574 if not self._match(TokenType.R_PAREN): 5575 self._retreat(index) 5576 return None 5577 5578 maybe_func = True 5579 5580 values: t.Optional[t.List[exp.Expression]] = None 5581 5582 if nested and self._match(TokenType.LT): 5583 if is_struct: 5584 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5585 else: 5586 expressions = self._parse_csv( 5587 lambda: self._parse_types( 5588 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5589 ) 5590 ) 5591 5592 if not self._match(TokenType.GT): 5593 self.raise_error("Expecting >") 5594 5595 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5596 values = self._parse_csv(self._parse_disjunction) 5597 if not values and is_struct: 5598 values = None 5599 self._retreat(self._index - 1) 5600 else: 5601 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5602 5603 if type_token in self.TIMESTAMPS: 5604 if self._match_text_seq("WITH", "TIME", "ZONE"): 5605 maybe_func = False 5606 tz_type = ( 5607 exp.DataType.Type.TIMETZ 5608 if type_token in self.TIMES 5609 else exp.DataType.Type.TIMESTAMPTZ 5610 ) 5611 this = exp.DataType(this=tz_type, expressions=expressions) 5612 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5613 maybe_func = False 5614 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5615 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5616 maybe_func = False 5617 elif type_token == TokenType.INTERVAL: 5618 if self._curr and self._curr.text.upper() in self.dialect.VALID_INTERVAL_UNITS: 5619 unit = self._parse_var(upper=True) 5620 if self._match_text_seq("TO"): 5621 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5622 5623 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5624 else: 5625 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5626 elif type_token == TokenType.VOID: 5627 this = exp.DataType(this=exp.DataType.Type.NULL) 5628 5629 if maybe_func and check_func: 5630 index2 = self._index 5631 peek = self._parse_string() 5632 5633 if not peek: 5634 self._retreat(index) 5635 return None 5636 5637 self._retreat(index2) 5638 5639 if not this: 5640 if self._match_text_seq("UNSIGNED"): 5641 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5642 if not unsigned_type_token: 5643 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5644 5645 type_token = unsigned_type_token or type_token 5646 5647 # NULLABLE without parentheses can be a column (Presto/Trino) 5648 if type_token == TokenType.NULLABLE and not expressions: 5649 self._retreat(index) 5650 return None 5651 5652 this = exp.DataType( 5653 this=exp.DataType.Type[type_token.value], 5654 expressions=expressions, 5655 nested=nested, 5656 prefix=prefix, 5657 ) 5658 5659 # Empty arrays/structs are allowed 5660 if values is not None: 5661 cls = exp.Struct if is_struct else exp.Array 5662 this = exp.cast(cls(expressions=values), this, copy=False) 5663 5664 elif expressions: 5665 this.set("expressions", expressions) 5666 5667 # https://materialize.com/docs/sql/types/list/#type-name 5668 while self._match(TokenType.LIST): 5669 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5670 5671 index = self._index 5672 5673 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5674 matched_array = self._match(TokenType.ARRAY) 5675 5676 while self._curr: 5677 datatype_token = self._prev.token_type 5678 matched_l_bracket = self._match(TokenType.L_BRACKET) 5679 5680 if (not matched_l_bracket and not matched_array) or ( 5681 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5682 ): 5683 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5684 # not to be confused with the fixed size array parsing 5685 break 5686 5687 matched_array = False 5688 values = self._parse_csv(self._parse_disjunction) or None 5689 if ( 5690 values 5691 and not schema 5692 and ( 5693 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS 5694 or datatype_token == TokenType.ARRAY 5695 or not self._match(TokenType.R_BRACKET, advance=False) 5696 ) 5697 ): 5698 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5699 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5700 self._retreat(index) 5701 break 5702 5703 this = exp.DataType( 5704 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5705 ) 5706 self._match(TokenType.R_BRACKET) 5707 5708 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5709 converter = self.TYPE_CONVERTERS.get(this.this) 5710 if converter: 5711 this = converter(t.cast(exp.DataType, this)) 5712 5713 return this 5714 5715 def _parse_vector_expressions( 5716 self, expressions: t.List[exp.Expression] 5717 ) -> t.List[exp.Expression]: 5718 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5719 5720 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5721 index = self._index 5722 5723 if ( 5724 self._curr 5725 and self._next 5726 and self._curr.token_type in self.TYPE_TOKENS 5727 and self._next.token_type in self.TYPE_TOKENS 5728 ): 5729 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5730 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5731 this = self._parse_id_var() 5732 else: 5733 this = ( 5734 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5735 or self._parse_id_var() 5736 ) 5737 5738 self._match(TokenType.COLON) 5739 5740 if ( 5741 type_required 5742 and not isinstance(this, exp.DataType) 5743 and not self._match_set(self.TYPE_TOKENS, advance=False) 5744 ): 5745 self._retreat(index) 5746 return self._parse_types() 5747 5748 return self._parse_column_def(this) 5749 5750 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5751 if not self._match_text_seq("AT", "TIME", "ZONE"): 5752 return this 5753 return self._parse_at_time_zone( 5754 self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5755 ) 5756 5757 def _parse_column(self) -> t.Optional[exp.Expression]: 5758 this = self._parse_column_reference() 5759 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5760 5761 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5762 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5763 5764 return column 5765 5766 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5767 this = self._parse_field() 5768 if ( 5769 not this 5770 and self._match(TokenType.VALUES, advance=False) 5771 and self.VALUES_FOLLOWED_BY_PAREN 5772 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5773 ): 5774 this = self._parse_id_var() 5775 5776 if isinstance(this, exp.Identifier): 5777 # We bubble up comments from the Identifier to the Column 5778 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5779 5780 return this 5781 5782 def _parse_colon_as_variant_extract( 5783 self, this: t.Optional[exp.Expression] 5784 ) -> t.Optional[exp.Expression]: 5785 casts = [] 5786 json_path = [] 5787 escape = None 5788 5789 while self._match(TokenType.COLON): 5790 start_index = self._index 5791 5792 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5793 path = self._parse_column_ops( 5794 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5795 ) 5796 5797 # The cast :: operator has a lower precedence than the extraction operator :, so 5798 # we rearrange the AST appropriately to avoid casting the JSON path 5799 while isinstance(path, exp.Cast): 5800 casts.append(path.to) 5801 path = path.this 5802 5803 if casts: 5804 dcolon_offset = next( 5805 i 5806 for i, t in enumerate(self._tokens[start_index:]) 5807 if t.token_type == TokenType.DCOLON 5808 ) 5809 end_token = self._tokens[start_index + dcolon_offset - 1] 5810 else: 5811 end_token = self._prev 5812 5813 if path: 5814 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5815 # it'll roundtrip to a string literal in GET_PATH 5816 if isinstance(path, exp.Identifier) and path.quoted: 5817 escape = True 5818 5819 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5820 5821 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5822 # Databricks transforms it back to the colon/dot notation 5823 if json_path: 5824 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5825 5826 if json_path_expr: 5827 json_path_expr.set("escape", escape) 5828 5829 this = self.expression( 5830 exp.JSONExtract, 5831 this=this, 5832 expression=json_path_expr, 5833 variant_extract=True, 5834 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5835 ) 5836 5837 while casts: 5838 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5839 5840 return this 5841 5842 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5843 return self._parse_types() 5844 5845 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5846 this = self._parse_bracket(this) 5847 5848 while self._match_set(self.COLUMN_OPERATORS): 5849 op_token = self._prev.token_type 5850 op = self.COLUMN_OPERATORS.get(op_token) 5851 5852 if op_token in self.CAST_COLUMN_OPERATORS: 5853 field = self._parse_dcolon() 5854 if not field: 5855 self.raise_error("Expected type") 5856 elif op and self._curr: 5857 field = self._parse_column_reference() or self._parse_bitwise() 5858 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5859 field = self._parse_column_ops(field) 5860 else: 5861 field = self._parse_field(any_token=True, anonymous_func=True) 5862 5863 # Function calls can be qualified, e.g., x.y.FOO() 5864 # This converts the final AST to a series of Dots leading to the function call 5865 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5866 if isinstance(field, (exp.Func, exp.Window)) and this: 5867 this = this.transform( 5868 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5869 ) 5870 5871 if op: 5872 this = op(self, this, field) 5873 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5874 this = self.expression( 5875 exp.Column, 5876 comments=this.comments, 5877 this=field, 5878 table=this.this, 5879 db=this.args.get("table"), 5880 catalog=this.args.get("db"), 5881 ) 5882 elif isinstance(field, exp.Window): 5883 # Move the exp.Dot's to the window's function 5884 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5885 field.set("this", window_func) 5886 this = field 5887 else: 5888 this = self.expression(exp.Dot, this=this, expression=field) 5889 5890 if field and field.comments: 5891 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5892 5893 this = self._parse_bracket(this) 5894 5895 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5896 5897 def _parse_paren(self) -> t.Optional[exp.Expression]: 5898 if not self._match(TokenType.L_PAREN): 5899 return None 5900 5901 comments = self._prev_comments 5902 query = self._parse_select() 5903 5904 if query: 5905 expressions = [query] 5906 else: 5907 expressions = self._parse_expressions() 5908 5909 this = seq_get(expressions, 0) 5910 5911 if not this and self._match(TokenType.R_PAREN, advance=False): 5912 this = self.expression(exp.Tuple) 5913 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5914 this = self._parse_subquery(this=this, parse_alias=False) 5915 elif isinstance(this, (exp.Subquery, exp.Values)): 5916 this = self._parse_subquery( 5917 this=self._parse_query_modifiers(self._parse_set_operations(this)), 5918 parse_alias=False, 5919 ) 5920 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5921 this = self.expression(exp.Tuple, expressions=expressions) 5922 else: 5923 this = self.expression(exp.Paren, this=this) 5924 5925 if this: 5926 this.add_comments(comments) 5927 5928 self._match_r_paren(expression=this) 5929 5930 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 5931 return self._parse_window(this) 5932 5933 return this 5934 5935 def _parse_primary(self) -> t.Optional[exp.Expression]: 5936 if self._match_set(self.PRIMARY_PARSERS): 5937 token_type = self._prev.token_type 5938 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5939 5940 if token_type == TokenType.STRING: 5941 expressions = [primary] 5942 while self._match(TokenType.STRING): 5943 expressions.append(exp.Literal.string(self._prev.text)) 5944 5945 if len(expressions) > 1: 5946 return self.expression( 5947 exp.Concat, expressions=expressions, coalesce=self.dialect.CONCAT_COALESCE 5948 ) 5949 5950 return primary 5951 5952 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5953 return exp.Literal.number(f"0.{self._prev.text}") 5954 5955 return self._parse_paren() 5956 5957 def _parse_field( 5958 self, 5959 any_token: bool = False, 5960 tokens: t.Optional[t.Collection[TokenType]] = None, 5961 anonymous_func: bool = False, 5962 ) -> t.Optional[exp.Expression]: 5963 if anonymous_func: 5964 field = ( 5965 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5966 or self._parse_primary() 5967 ) 5968 else: 5969 field = self._parse_primary() or self._parse_function( 5970 anonymous=anonymous_func, any_token=any_token 5971 ) 5972 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5973 5974 def _parse_function( 5975 self, 5976 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5977 anonymous: bool = False, 5978 optional_parens: bool = True, 5979 any_token: bool = False, 5980 ) -> t.Optional[exp.Expression]: 5981 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5982 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5983 fn_syntax = False 5984 if ( 5985 self._match(TokenType.L_BRACE, advance=False) 5986 and self._next 5987 and self._next.text.upper() == "FN" 5988 ): 5989 self._advance(2) 5990 fn_syntax = True 5991 5992 func = self._parse_function_call( 5993 functions=functions, 5994 anonymous=anonymous, 5995 optional_parens=optional_parens, 5996 any_token=any_token, 5997 ) 5998 5999 if fn_syntax: 6000 self._match(TokenType.R_BRACE) 6001 6002 return func 6003 6004 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 6005 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 6006 6007 def _parse_function_call( 6008 self, 6009 functions: t.Optional[t.Dict[str, t.Callable]] = None, 6010 anonymous: bool = False, 6011 optional_parens: bool = True, 6012 any_token: bool = False, 6013 ) -> t.Optional[exp.Expression]: 6014 if not self._curr: 6015 return None 6016 6017 comments = self._curr.comments 6018 prev = self._prev 6019 token = self._curr 6020 token_type = self._curr.token_type 6021 this = self._curr.text 6022 upper = this.upper() 6023 6024 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 6025 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 6026 self._advance() 6027 return self._parse_window(parser(self)) 6028 6029 if not self._next or self._next.token_type != TokenType.L_PAREN: 6030 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 6031 self._advance() 6032 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 6033 6034 return None 6035 6036 if any_token: 6037 if token_type in self.RESERVED_TOKENS: 6038 return None 6039 elif token_type not in self.FUNC_TOKENS: 6040 return None 6041 6042 self._advance(2) 6043 6044 parser = self.FUNCTION_PARSERS.get(upper) 6045 if parser and not anonymous: 6046 this = parser(self) 6047 else: 6048 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 6049 6050 if subquery_predicate: 6051 expr = None 6052 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 6053 expr = self._parse_select() 6054 self._match_r_paren() 6055 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 6056 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 6057 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 6058 self._advance(-1) 6059 expr = self._parse_bitwise() 6060 6061 if expr: 6062 return self.expression(subquery_predicate, comments=comments, this=expr) 6063 6064 if functions is None: 6065 functions = self.FUNCTIONS 6066 6067 function = functions.get(upper) 6068 known_function = function and not anonymous 6069 6070 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 6071 args = self._parse_function_args(alias) 6072 6073 post_func_comments = self._curr and self._curr.comments 6074 if known_function and post_func_comments: 6075 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 6076 # call we'll construct it as exp.Anonymous, even if it's "known" 6077 if any( 6078 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 6079 for comment in post_func_comments 6080 ): 6081 known_function = False 6082 6083 if alias and known_function: 6084 args = self._kv_to_prop_eq(args) 6085 6086 if known_function: 6087 func_builder = t.cast(t.Callable, function) 6088 6089 if "dialect" in func_builder.__code__.co_varnames: 6090 func = func_builder(args, dialect=self.dialect) 6091 else: 6092 func = func_builder(args) 6093 6094 func = self.validate_expression(func, args) 6095 if self.dialect.PRESERVE_ORIGINAL_NAMES: 6096 func.meta["name"] = this 6097 6098 this = func 6099 else: 6100 if token_type == TokenType.IDENTIFIER: 6101 this = exp.Identifier(this=this, quoted=True).update_positions(token) 6102 6103 this = self.expression(exp.Anonymous, this=this, expressions=args) 6104 6105 this = this.update_positions(token) 6106 6107 if isinstance(this, exp.Expression): 6108 this.add_comments(comments) 6109 6110 self._match_r_paren(this) 6111 return self._parse_window(this) 6112 6113 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 6114 return expression 6115 6116 def _kv_to_prop_eq( 6117 self, expressions: t.List[exp.Expression], parse_map: bool = False 6118 ) -> t.List[exp.Expression]: 6119 transformed = [] 6120 6121 for index, e in enumerate(expressions): 6122 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 6123 if isinstance(e, exp.Alias): 6124 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 6125 6126 if not isinstance(e, exp.PropertyEQ): 6127 e = self.expression( 6128 exp.PropertyEQ, 6129 this=e.this if parse_map else exp.to_identifier(e.this.name), 6130 expression=e.expression, 6131 ) 6132 6133 if isinstance(e.this, exp.Column): 6134 e.this.replace(e.this.this) 6135 else: 6136 e = self._to_prop_eq(e, index) 6137 6138 transformed.append(e) 6139 6140 return transformed 6141 6142 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 6143 return self._parse_statement() 6144 6145 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 6146 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 6147 6148 def _parse_user_defined_function( 6149 self, kind: t.Optional[TokenType] = None 6150 ) -> t.Optional[exp.Expression]: 6151 this = self._parse_table_parts(schema=True) 6152 6153 if not self._match(TokenType.L_PAREN): 6154 return this 6155 6156 expressions = self._parse_csv(self._parse_function_parameter) 6157 self._match_r_paren() 6158 return self.expression( 6159 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 6160 ) 6161 6162 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 6163 literal = self._parse_primary() 6164 if literal: 6165 return self.expression(exp.Introducer, token=token, expression=literal) 6166 6167 return self._identifier_expression(token) 6168 6169 def _parse_session_parameter(self) -> exp.SessionParameter: 6170 kind = None 6171 this = self._parse_id_var() or self._parse_primary() 6172 6173 if this and self._match(TokenType.DOT): 6174 kind = this.name 6175 this = self._parse_var() or self._parse_primary() 6176 6177 return self.expression(exp.SessionParameter, this=this, kind=kind) 6178 6179 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6180 return self._parse_id_var() 6181 6182 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6183 index = self._index 6184 6185 if self._match(TokenType.L_PAREN): 6186 expressions = t.cast( 6187 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6188 ) 6189 6190 if not self._match(TokenType.R_PAREN): 6191 self._retreat(index) 6192 else: 6193 expressions = [self._parse_lambda_arg()] 6194 6195 if self._match_set(self.LAMBDAS): 6196 return self.LAMBDAS[self._prev.token_type](self, expressions) 6197 6198 self._retreat(index) 6199 6200 this: t.Optional[exp.Expression] 6201 6202 if self._match(TokenType.DISTINCT): 6203 this = self.expression( 6204 exp.Distinct, expressions=self._parse_csv(self._parse_disjunction) 6205 ) 6206 else: 6207 this = self._parse_select_or_expression(alias=alias) 6208 6209 return self._parse_limit( 6210 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6211 ) 6212 6213 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6214 index = self._index 6215 if not self._match(TokenType.L_PAREN): 6216 return this 6217 6218 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6219 # expr can be of both types 6220 if self._match_set(self.SELECT_START_TOKENS): 6221 self._retreat(index) 6222 return this 6223 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6224 self._match_r_paren() 6225 return self.expression(exp.Schema, this=this, expressions=args) 6226 6227 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6228 return self._parse_column_def(self._parse_field(any_token=True)) 6229 6230 def _parse_column_def( 6231 self, this: t.Optional[exp.Expression], computed_column: bool = True 6232 ) -> t.Optional[exp.Expression]: 6233 # column defs are not really columns, they're identifiers 6234 if isinstance(this, exp.Column): 6235 this = this.this 6236 6237 if not computed_column: 6238 self._match(TokenType.ALIAS) 6239 6240 kind = self._parse_types(schema=True) 6241 6242 if self._match_text_seq("FOR", "ORDINALITY"): 6243 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6244 6245 constraints: t.List[exp.Expression] = [] 6246 6247 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6248 ("ALIAS", "MATERIALIZED") 6249 ): 6250 persisted = self._prev.text.upper() == "MATERIALIZED" 6251 constraint_kind = exp.ComputedColumnConstraint( 6252 this=self._parse_disjunction(), 6253 persisted=persisted or self._match_text_seq("PERSISTED"), 6254 data_type=exp.Var(this="AUTO") 6255 if self._match_text_seq("AUTO") 6256 else self._parse_types(), 6257 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6258 ) 6259 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6260 elif not kind and self._match_set({TokenType.IN, TokenType.OUT}, advance=False): 6261 in_out_constraint = self.expression( 6262 exp.InOutColumnConstraint, 6263 input_=self._match(TokenType.IN), 6264 output=self._match(TokenType.OUT), 6265 ) 6266 constraints.append(in_out_constraint) 6267 kind = self._parse_types() 6268 elif ( 6269 kind 6270 and self._match(TokenType.ALIAS, advance=False) 6271 and ( 6272 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6273 or (self._next and self._next.token_type == TokenType.L_PAREN) 6274 ) 6275 ): 6276 self._advance() 6277 constraints.append( 6278 self.expression( 6279 exp.ColumnConstraint, 6280 kind=exp.ComputedColumnConstraint( 6281 this=self._parse_disjunction(), 6282 persisted=self._match_texts(("STORED", "VIRTUAL")) 6283 and self._prev.text.upper() == "STORED", 6284 ), 6285 ) 6286 ) 6287 6288 while True: 6289 constraint = self._parse_column_constraint() 6290 if not constraint: 6291 break 6292 constraints.append(constraint) 6293 6294 if not kind and not constraints: 6295 return this 6296 6297 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6298 6299 def _parse_auto_increment( 6300 self, 6301 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6302 start = None 6303 increment = None 6304 order = None 6305 6306 if self._match(TokenType.L_PAREN, advance=False): 6307 args = self._parse_wrapped_csv(self._parse_bitwise) 6308 start = seq_get(args, 0) 6309 increment = seq_get(args, 1) 6310 elif self._match_text_seq("START"): 6311 start = self._parse_bitwise() 6312 self._match_text_seq("INCREMENT") 6313 increment = self._parse_bitwise() 6314 if self._match_text_seq("ORDER"): 6315 order = True 6316 elif self._match_text_seq("NOORDER"): 6317 order = False 6318 6319 if start and increment: 6320 return exp.GeneratedAsIdentityColumnConstraint( 6321 start=start, increment=increment, this=False, order=order 6322 ) 6323 6324 return exp.AutoIncrementColumnConstraint() 6325 6326 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6327 if not self._match_text_seq("REFRESH"): 6328 self._retreat(self._index - 1) 6329 return None 6330 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6331 6332 def _parse_compress(self) -> exp.CompressColumnConstraint: 6333 if self._match(TokenType.L_PAREN, advance=False): 6334 return self.expression( 6335 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6336 ) 6337 6338 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6339 6340 def _parse_generated_as_identity( 6341 self, 6342 ) -> ( 6343 exp.GeneratedAsIdentityColumnConstraint 6344 | exp.ComputedColumnConstraint 6345 | exp.GeneratedAsRowColumnConstraint 6346 ): 6347 if self._match_text_seq("BY", "DEFAULT"): 6348 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6349 this = self.expression( 6350 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6351 ) 6352 else: 6353 self._match_text_seq("ALWAYS") 6354 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6355 6356 self._match(TokenType.ALIAS) 6357 6358 if self._match_text_seq("ROW"): 6359 start = self._match_text_seq("START") 6360 if not start: 6361 self._match(TokenType.END) 6362 hidden = self._match_text_seq("HIDDEN") 6363 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6364 6365 identity = self._match_text_seq("IDENTITY") 6366 6367 if self._match(TokenType.L_PAREN): 6368 if self._match(TokenType.START_WITH): 6369 this.set("start", self._parse_bitwise()) 6370 if self._match_text_seq("INCREMENT", "BY"): 6371 this.set("increment", self._parse_bitwise()) 6372 if self._match_text_seq("MINVALUE"): 6373 this.set("minvalue", self._parse_bitwise()) 6374 if self._match_text_seq("MAXVALUE"): 6375 this.set("maxvalue", self._parse_bitwise()) 6376 6377 if self._match_text_seq("CYCLE"): 6378 this.set("cycle", True) 6379 elif self._match_text_seq("NO", "CYCLE"): 6380 this.set("cycle", False) 6381 6382 if not identity: 6383 this.set("expression", self._parse_range()) 6384 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6385 args = self._parse_csv(self._parse_bitwise) 6386 this.set("start", seq_get(args, 0)) 6387 this.set("increment", seq_get(args, 1)) 6388 6389 self._match_r_paren() 6390 6391 return this 6392 6393 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6394 self._match_text_seq("LENGTH") 6395 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6396 6397 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6398 if self._match_text_seq("NULL"): 6399 return self.expression(exp.NotNullColumnConstraint) 6400 if self._match_text_seq("CASESPECIFIC"): 6401 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6402 if self._match_text_seq("FOR", "REPLICATION"): 6403 return self.expression(exp.NotForReplicationColumnConstraint) 6404 6405 # Unconsume the `NOT` token 6406 self._retreat(self._index - 1) 6407 return None 6408 6409 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6410 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6411 6412 procedure_option_follows = ( 6413 self._match(TokenType.WITH, advance=False) 6414 and self._next 6415 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6416 ) 6417 6418 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6419 return self.expression( 6420 exp.ColumnConstraint, 6421 this=this, 6422 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6423 ) 6424 6425 return this 6426 6427 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6428 if not self._match(TokenType.CONSTRAINT): 6429 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6430 6431 return self.expression( 6432 exp.Constraint, 6433 this=self._parse_id_var(), 6434 expressions=self._parse_unnamed_constraints(), 6435 ) 6436 6437 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6438 constraints = [] 6439 while True: 6440 constraint = self._parse_unnamed_constraint() or self._parse_function() 6441 if not constraint: 6442 break 6443 constraints.append(constraint) 6444 6445 return constraints 6446 6447 def _parse_unnamed_constraint( 6448 self, constraints: t.Optional[t.Collection[str]] = None 6449 ) -> t.Optional[exp.Expression]: 6450 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6451 constraints or self.CONSTRAINT_PARSERS 6452 ): 6453 return None 6454 6455 constraint = self._prev.text.upper() 6456 if constraint not in self.CONSTRAINT_PARSERS: 6457 self.raise_error(f"No parser found for schema constraint {constraint}.") 6458 6459 return self.CONSTRAINT_PARSERS[constraint](self) 6460 6461 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6462 return self._parse_id_var(any_token=False) 6463 6464 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6465 self._match_texts(("KEY", "INDEX")) 6466 return self.expression( 6467 exp.UniqueColumnConstraint, 6468 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6469 this=self._parse_schema(self._parse_unique_key()), 6470 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6471 on_conflict=self._parse_on_conflict(), 6472 options=self._parse_key_constraint_options(), 6473 ) 6474 6475 def _parse_key_constraint_options(self) -> t.List[str]: 6476 options = [] 6477 while True: 6478 if not self._curr: 6479 break 6480 6481 if self._match(TokenType.ON): 6482 action = None 6483 on = self._advance_any() and self._prev.text 6484 6485 if self._match_text_seq("NO", "ACTION"): 6486 action = "NO ACTION" 6487 elif self._match_text_seq("CASCADE"): 6488 action = "CASCADE" 6489 elif self._match_text_seq("RESTRICT"): 6490 action = "RESTRICT" 6491 elif self._match_pair(TokenType.SET, TokenType.NULL): 6492 action = "SET NULL" 6493 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6494 action = "SET DEFAULT" 6495 else: 6496 self.raise_error("Invalid key constraint") 6497 6498 options.append(f"ON {on} {action}") 6499 else: 6500 var = self._parse_var_from_options( 6501 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6502 ) 6503 if not var: 6504 break 6505 options.append(var.name) 6506 6507 return options 6508 6509 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6510 if match and not self._match(TokenType.REFERENCES): 6511 return None 6512 6513 expressions = None 6514 this = self._parse_table(schema=True) 6515 options = self._parse_key_constraint_options() 6516 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6517 6518 def _parse_foreign_key(self) -> exp.ForeignKey: 6519 expressions = ( 6520 self._parse_wrapped_id_vars() 6521 if not self._match(TokenType.REFERENCES, advance=False) 6522 else None 6523 ) 6524 reference = self._parse_references() 6525 on_options = {} 6526 6527 while self._match(TokenType.ON): 6528 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6529 self.raise_error("Expected DELETE or UPDATE") 6530 6531 kind = self._prev.text.lower() 6532 6533 if self._match_text_seq("NO", "ACTION"): 6534 action = "NO ACTION" 6535 elif self._match(TokenType.SET): 6536 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6537 action = "SET " + self._prev.text.upper() 6538 else: 6539 self._advance() 6540 action = self._prev.text.upper() 6541 6542 on_options[kind] = action 6543 6544 return self.expression( 6545 exp.ForeignKey, 6546 expressions=expressions, 6547 reference=reference, 6548 options=self._parse_key_constraint_options(), 6549 **on_options, # type: ignore 6550 ) 6551 6552 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6553 return self._parse_field() 6554 6555 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6556 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6557 self._retreat(self._index - 1) 6558 return None 6559 6560 id_vars = self._parse_wrapped_id_vars() 6561 return self.expression( 6562 exp.PeriodForSystemTimeConstraint, 6563 this=seq_get(id_vars, 0), 6564 expression=seq_get(id_vars, 1), 6565 ) 6566 6567 def _parse_primary_key( 6568 self, wrapped_optional: bool = False, in_props: bool = False 6569 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6570 desc = ( 6571 self._match_set((TokenType.ASC, TokenType.DESC)) 6572 and self._prev.token_type == TokenType.DESC 6573 ) 6574 6575 this = None 6576 if ( 6577 self._curr.text.upper() not in self.CONSTRAINT_PARSERS 6578 and self._next 6579 and self._next.token_type == TokenType.L_PAREN 6580 ): 6581 this = self._parse_id_var() 6582 6583 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6584 return self.expression( 6585 exp.PrimaryKeyColumnConstraint, 6586 desc=desc, 6587 options=self._parse_key_constraint_options(), 6588 ) 6589 6590 expressions = self._parse_wrapped_csv( 6591 self._parse_primary_key_part, optional=wrapped_optional 6592 ) 6593 6594 return self.expression( 6595 exp.PrimaryKey, 6596 this=this, 6597 expressions=expressions, 6598 include=self._parse_index_params(), 6599 options=self._parse_key_constraint_options(), 6600 ) 6601 6602 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6603 return self._parse_slice(self._parse_alias(self._parse_disjunction(), explicit=True)) 6604 6605 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6606 """ 6607 Parses a datetime column in ODBC format. We parse the column into the corresponding 6608 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6609 same as we did for `DATE('yyyy-mm-dd')`. 6610 6611 Reference: 6612 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6613 """ 6614 self._match(TokenType.VAR) 6615 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6616 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6617 if not self._match(TokenType.R_BRACE): 6618 self.raise_error("Expected }") 6619 return expression 6620 6621 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6622 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6623 return this 6624 6625 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6626 map_token = seq_get(self._tokens, self._index - 2) 6627 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6628 else: 6629 parse_map = False 6630 6631 bracket_kind = self._prev.token_type 6632 if ( 6633 bracket_kind == TokenType.L_BRACE 6634 and self._curr 6635 and self._curr.token_type == TokenType.VAR 6636 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6637 ): 6638 return self._parse_odbc_datetime_literal() 6639 6640 expressions = self._parse_csv( 6641 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6642 ) 6643 6644 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6645 self.raise_error("Expected ]") 6646 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6647 self.raise_error("Expected }") 6648 6649 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6650 if bracket_kind == TokenType.L_BRACE: 6651 this = self.expression( 6652 exp.Struct, 6653 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6654 ) 6655 elif not this: 6656 this = build_array_constructor( 6657 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6658 ) 6659 else: 6660 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6661 if constructor_type: 6662 return build_array_constructor( 6663 constructor_type, 6664 args=expressions, 6665 bracket_kind=bracket_kind, 6666 dialect=self.dialect, 6667 ) 6668 6669 expressions = apply_index_offset( 6670 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6671 ) 6672 this = self.expression( 6673 exp.Bracket, 6674 this=this, 6675 expressions=expressions, 6676 comments=this.pop_comments(), 6677 ) 6678 6679 self._add_comments(this) 6680 return self._parse_bracket(this) 6681 6682 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6683 if not self._match(TokenType.COLON): 6684 return this 6685 6686 if self._match_pair(TokenType.DASH, TokenType.COLON, advance=False): 6687 self._advance() 6688 end: t.Optional[exp.Expression] = -exp.Literal.number("1") 6689 else: 6690 end = self._parse_unary() 6691 step = self._parse_unary() if self._match(TokenType.COLON) else None 6692 return self.expression(exp.Slice, this=this, expression=end, step=step) 6693 6694 def _parse_case(self) -> t.Optional[exp.Expression]: 6695 if self._match(TokenType.DOT, advance=False): 6696 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6697 self._retreat(self._index - 1) 6698 return None 6699 6700 ifs = [] 6701 default = None 6702 6703 comments = self._prev_comments 6704 expression = self._parse_disjunction() 6705 6706 while self._match(TokenType.WHEN): 6707 this = self._parse_disjunction() 6708 self._match(TokenType.THEN) 6709 then = self._parse_disjunction() 6710 ifs.append(self.expression(exp.If, this=this, true=then)) 6711 6712 if self._match(TokenType.ELSE): 6713 default = self._parse_disjunction() 6714 6715 if not self._match(TokenType.END): 6716 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6717 default = exp.column("interval") 6718 else: 6719 self.raise_error("Expected END after CASE", self._prev) 6720 6721 return self.expression( 6722 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6723 ) 6724 6725 def _parse_if(self) -> t.Optional[exp.Expression]: 6726 if self._match(TokenType.L_PAREN): 6727 args = self._parse_csv( 6728 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6729 ) 6730 this = self.validate_expression(exp.If.from_arg_list(args), args) 6731 self._match_r_paren() 6732 else: 6733 index = self._index - 1 6734 6735 if self.NO_PAREN_IF_COMMANDS and index == 0: 6736 return self._parse_as_command(self._prev) 6737 6738 condition = self._parse_disjunction() 6739 6740 if not condition: 6741 self._retreat(index) 6742 return None 6743 6744 self._match(TokenType.THEN) 6745 true = self._parse_disjunction() 6746 false = self._parse_disjunction() if self._match(TokenType.ELSE) else None 6747 self._match(TokenType.END) 6748 this = self.expression(exp.If, this=condition, true=true, false=false) 6749 6750 return this 6751 6752 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6753 if not self._match_text_seq("VALUE", "FOR"): 6754 self._retreat(self._index - 1) 6755 return None 6756 6757 return self.expression( 6758 exp.NextValueFor, 6759 this=self._parse_column(), 6760 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6761 ) 6762 6763 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6764 this = self._parse_function() or self._parse_var_or_string(upper=True) 6765 6766 if self._match(TokenType.FROM): 6767 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6768 6769 if not self._match(TokenType.COMMA): 6770 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6771 6772 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6773 6774 def _parse_gap_fill(self) -> exp.GapFill: 6775 self._match(TokenType.TABLE) 6776 this = self._parse_table() 6777 6778 self._match(TokenType.COMMA) 6779 args = [this, *self._parse_csv(self._parse_lambda)] 6780 6781 gap_fill = exp.GapFill.from_arg_list(args) 6782 return self.validate_expression(gap_fill, args) 6783 6784 def _parse_char(self) -> exp.Chr: 6785 return self.expression( 6786 exp.Chr, 6787 expressions=self._parse_csv(self._parse_assignment), 6788 charset=self._match(TokenType.USING) and self._parse_var(), 6789 ) 6790 6791 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6792 this = self._parse_disjunction() 6793 6794 if not self._match(TokenType.ALIAS): 6795 if self._match(TokenType.COMMA): 6796 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6797 6798 self.raise_error("Expected AS after CAST") 6799 6800 fmt = None 6801 to = self._parse_types() 6802 6803 default = self._match(TokenType.DEFAULT) 6804 if default: 6805 default = self._parse_bitwise() 6806 self._match_text_seq("ON", "CONVERSION", "ERROR") 6807 6808 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6809 fmt_string = self._parse_string() 6810 fmt = self._parse_at_time_zone(fmt_string) 6811 6812 if not to: 6813 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6814 if to.this in exp.DataType.TEMPORAL_TYPES: 6815 this = self.expression( 6816 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6817 this=this, 6818 format=exp.Literal.string( 6819 format_time( 6820 fmt_string.this if fmt_string else "", 6821 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6822 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6823 ) 6824 ), 6825 safe=safe, 6826 ) 6827 6828 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6829 this.set("zone", fmt.args["zone"]) 6830 return this 6831 elif not to: 6832 self.raise_error("Expected TYPE after CAST") 6833 elif isinstance(to, exp.Identifier): 6834 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6835 elif to.this == exp.DataType.Type.CHAR: 6836 if self._match(TokenType.CHARACTER_SET): 6837 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6838 6839 return self.build_cast( 6840 strict=strict, 6841 this=this, 6842 to=to, 6843 format=fmt, 6844 safe=safe, 6845 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6846 default=default, 6847 ) 6848 6849 def _parse_string_agg(self) -> exp.GroupConcat: 6850 if self._match(TokenType.DISTINCT): 6851 args: t.List[t.Optional[exp.Expression]] = [ 6852 self.expression(exp.Distinct, expressions=[self._parse_disjunction()]) 6853 ] 6854 if self._match(TokenType.COMMA): 6855 args.extend(self._parse_csv(self._parse_disjunction)) 6856 else: 6857 args = self._parse_csv(self._parse_disjunction) # type: ignore 6858 6859 if self._match_text_seq("ON", "OVERFLOW"): 6860 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6861 if self._match_text_seq("ERROR"): 6862 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6863 else: 6864 self._match_text_seq("TRUNCATE") 6865 on_overflow = self.expression( 6866 exp.OverflowTruncateBehavior, 6867 this=self._parse_string(), 6868 with_count=( 6869 self._match_text_seq("WITH", "COUNT") 6870 or not self._match_text_seq("WITHOUT", "COUNT") 6871 ), 6872 ) 6873 else: 6874 on_overflow = None 6875 6876 index = self._index 6877 if not self._match(TokenType.R_PAREN) and args: 6878 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6879 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6880 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6881 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6882 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6883 6884 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6885 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6886 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6887 if not self._match_text_seq("WITHIN", "GROUP"): 6888 self._retreat(index) 6889 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6890 6891 # The corresponding match_r_paren will be called in parse_function (caller) 6892 self._match_l_paren() 6893 6894 return self.expression( 6895 exp.GroupConcat, 6896 this=self._parse_order(this=seq_get(args, 0)), 6897 separator=seq_get(args, 1), 6898 on_overflow=on_overflow, 6899 ) 6900 6901 def _parse_convert( 6902 self, strict: bool, safe: t.Optional[bool] = None 6903 ) -> t.Optional[exp.Expression]: 6904 this = self._parse_bitwise() 6905 6906 if self._match(TokenType.USING): 6907 to: t.Optional[exp.Expression] = self.expression( 6908 exp.CharacterSet, this=self._parse_var(tokens={TokenType.BINARY}) 6909 ) 6910 elif self._match(TokenType.COMMA): 6911 to = self._parse_types() 6912 else: 6913 to = None 6914 6915 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6916 6917 def _parse_xml_element(self) -> exp.XMLElement: 6918 if self._match_text_seq("EVALNAME"): 6919 evalname = True 6920 this = self._parse_bitwise() 6921 else: 6922 evalname = None 6923 self._match_text_seq("NAME") 6924 this = self._parse_id_var() 6925 6926 return self.expression( 6927 exp.XMLElement, 6928 this=this, 6929 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_bitwise), 6930 evalname=evalname, 6931 ) 6932 6933 def _parse_xml_table(self) -> exp.XMLTable: 6934 namespaces = None 6935 passing = None 6936 columns = None 6937 6938 if self._match_text_seq("XMLNAMESPACES", "("): 6939 namespaces = self._parse_xml_namespace() 6940 self._match_text_seq(")", ",") 6941 6942 this = self._parse_string() 6943 6944 if self._match_text_seq("PASSING"): 6945 # The BY VALUE keywords are optional and are provided for semantic clarity 6946 self._match_text_seq("BY", "VALUE") 6947 passing = self._parse_csv(self._parse_column) 6948 6949 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6950 6951 if self._match_text_seq("COLUMNS"): 6952 columns = self._parse_csv(self._parse_field_def) 6953 6954 return self.expression( 6955 exp.XMLTable, 6956 this=this, 6957 namespaces=namespaces, 6958 passing=passing, 6959 columns=columns, 6960 by_ref=by_ref, 6961 ) 6962 6963 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6964 namespaces = [] 6965 6966 while True: 6967 if self._match(TokenType.DEFAULT): 6968 uri = self._parse_string() 6969 else: 6970 uri = self._parse_alias(self._parse_string()) 6971 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6972 if not self._match(TokenType.COMMA): 6973 break 6974 6975 return namespaces 6976 6977 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6978 args = self._parse_csv(self._parse_disjunction) 6979 6980 if len(args) < 3: 6981 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6982 6983 return self.expression(exp.DecodeCase, expressions=args) 6984 6985 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6986 self._match_text_seq("KEY") 6987 key = self._parse_column() 6988 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6989 self._match_text_seq("VALUE") 6990 value = self._parse_bitwise() 6991 6992 if not key and not value: 6993 return None 6994 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6995 6996 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6997 if not this or not self._match_text_seq("FORMAT", "JSON"): 6998 return this 6999 7000 return self.expression(exp.FormatJson, this=this) 7001 7002 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 7003 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 7004 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 7005 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 7006 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 7007 else: 7008 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 7009 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 7010 7011 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 7012 7013 if not empty and not error and not null: 7014 return None 7015 7016 return self.expression( 7017 exp.OnCondition, 7018 empty=empty, 7019 error=error, 7020 null=null, 7021 ) 7022 7023 def _parse_on_handling( 7024 self, on: str, *values: str 7025 ) -> t.Optional[str] | t.Optional[exp.Expression]: 7026 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 7027 for value in values: 7028 if self._match_text_seq(value, "ON", on): 7029 return f"{value} ON {on}" 7030 7031 index = self._index 7032 if self._match(TokenType.DEFAULT): 7033 default_value = self._parse_bitwise() 7034 if self._match_text_seq("ON", on): 7035 return default_value 7036 7037 self._retreat(index) 7038 7039 return None 7040 7041 @t.overload 7042 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 7043 7044 @t.overload 7045 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 7046 7047 def _parse_json_object(self, agg=False): 7048 star = self._parse_star() 7049 expressions = ( 7050 [star] 7051 if star 7052 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 7053 ) 7054 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 7055 7056 unique_keys = None 7057 if self._match_text_seq("WITH", "UNIQUE"): 7058 unique_keys = True 7059 elif self._match_text_seq("WITHOUT", "UNIQUE"): 7060 unique_keys = False 7061 7062 self._match_text_seq("KEYS") 7063 7064 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 7065 self._parse_type() 7066 ) 7067 encoding = self._match_text_seq("ENCODING") and self._parse_var() 7068 7069 return self.expression( 7070 exp.JSONObjectAgg if agg else exp.JSONObject, 7071 expressions=expressions, 7072 null_handling=null_handling, 7073 unique_keys=unique_keys, 7074 return_type=return_type, 7075 encoding=encoding, 7076 ) 7077 7078 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 7079 def _parse_json_column_def(self) -> exp.JSONColumnDef: 7080 if not self._match_text_seq("NESTED"): 7081 this = self._parse_id_var() 7082 ordinality = self._match_pair(TokenType.FOR, TokenType.ORDINALITY) 7083 kind = self._parse_types(allow_identifiers=False) 7084 nested = None 7085 else: 7086 this = None 7087 ordinality = None 7088 kind = None 7089 nested = True 7090 7091 path = self._match_text_seq("PATH") and self._parse_string() 7092 nested_schema = nested and self._parse_json_schema() 7093 7094 return self.expression( 7095 exp.JSONColumnDef, 7096 this=this, 7097 kind=kind, 7098 path=path, 7099 nested_schema=nested_schema, 7100 ordinality=ordinality, 7101 ) 7102 7103 def _parse_json_schema(self) -> exp.JSONSchema: 7104 self._match_text_seq("COLUMNS") 7105 return self.expression( 7106 exp.JSONSchema, 7107 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 7108 ) 7109 7110 def _parse_json_table(self) -> exp.JSONTable: 7111 this = self._parse_format_json(self._parse_bitwise()) 7112 path = self._match(TokenType.COMMA) and self._parse_string() 7113 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 7114 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 7115 schema = self._parse_json_schema() 7116 7117 return exp.JSONTable( 7118 this=this, 7119 schema=schema, 7120 path=path, 7121 error_handling=error_handling, 7122 empty_handling=empty_handling, 7123 ) 7124 7125 def _parse_match_against(self) -> exp.MatchAgainst: 7126 if self._match_text_seq("TABLE"): 7127 # parse SingleStore MATCH(TABLE ...) syntax 7128 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 7129 expressions = [] 7130 table = self._parse_table() 7131 if table: 7132 expressions = [table] 7133 else: 7134 expressions = self._parse_csv(self._parse_column) 7135 7136 self._match_text_seq(")", "AGAINST", "(") 7137 7138 this = self._parse_string() 7139 7140 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 7141 modifier = "IN NATURAL LANGUAGE MODE" 7142 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 7143 modifier = f"{modifier} WITH QUERY EXPANSION" 7144 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 7145 modifier = "IN BOOLEAN MODE" 7146 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 7147 modifier = "WITH QUERY EXPANSION" 7148 else: 7149 modifier = None 7150 7151 return self.expression( 7152 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 7153 ) 7154 7155 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 7156 def _parse_open_json(self) -> exp.OpenJSON: 7157 this = self._parse_bitwise() 7158 path = self._match(TokenType.COMMA) and self._parse_string() 7159 7160 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 7161 this = self._parse_field(any_token=True) 7162 kind = self._parse_types() 7163 path = self._parse_string() 7164 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 7165 7166 return self.expression( 7167 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 7168 ) 7169 7170 expressions = None 7171 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 7172 self._match_l_paren() 7173 expressions = self._parse_csv(_parse_open_json_column_def) 7174 7175 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 7176 7177 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 7178 args = self._parse_csv(self._parse_bitwise) 7179 7180 if self._match(TokenType.IN): 7181 return self.expression( 7182 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 7183 ) 7184 7185 if haystack_first: 7186 haystack = seq_get(args, 0) 7187 needle = seq_get(args, 1) 7188 else: 7189 haystack = seq_get(args, 1) 7190 needle = seq_get(args, 0) 7191 7192 return self.expression( 7193 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 7194 ) 7195 7196 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 7197 args = self._parse_csv(self._parse_table) 7198 return exp.JoinHint(this=func_name.upper(), expressions=args) 7199 7200 def _parse_substring(self) -> exp.Substring: 7201 # Postgres supports the form: substring(string [from int] [for int]) 7202 # (despite being undocumented, the reverse order also works) 7203 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 7204 7205 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 7206 7207 start, length = None, None 7208 7209 while self._curr: 7210 if self._match(TokenType.FROM): 7211 start = self._parse_bitwise() 7212 elif self._match(TokenType.FOR): 7213 if not start: 7214 start = exp.Literal.number(1) 7215 length = self._parse_bitwise() 7216 else: 7217 break 7218 7219 if start: 7220 args.append(start) 7221 if length: 7222 args.append(length) 7223 7224 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7225 7226 def _parse_trim(self) -> exp.Trim: 7227 # https://www.w3resource.com/sql/character-functions/trim.php 7228 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7229 7230 position = None 7231 collation = None 7232 expression = None 7233 7234 if self._match_texts(self.TRIM_TYPES): 7235 position = self._prev.text.upper() 7236 7237 this = self._parse_bitwise() 7238 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7239 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7240 expression = self._parse_bitwise() 7241 7242 if invert_order: 7243 this, expression = expression, this 7244 7245 if self._match(TokenType.COLLATE): 7246 collation = self._parse_bitwise() 7247 7248 return self.expression( 7249 exp.Trim, this=this, position=position, expression=expression, collation=collation 7250 ) 7251 7252 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7253 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7254 7255 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7256 return self._parse_window(self._parse_id_var(), alias=True) 7257 7258 def _parse_respect_or_ignore_nulls( 7259 self, this: t.Optional[exp.Expression] 7260 ) -> t.Optional[exp.Expression]: 7261 if self._match_text_seq("IGNORE", "NULLS"): 7262 return self.expression(exp.IgnoreNulls, this=this) 7263 if self._match_text_seq("RESPECT", "NULLS"): 7264 return self.expression(exp.RespectNulls, this=this) 7265 return this 7266 7267 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7268 if self._match(TokenType.HAVING): 7269 self._match_texts(("MAX", "MIN")) 7270 max = self._prev.text.upper() != "MIN" 7271 return self.expression( 7272 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7273 ) 7274 7275 return this 7276 7277 def _parse_window( 7278 self, this: t.Optional[exp.Expression], alias: bool = False 7279 ) -> t.Optional[exp.Expression]: 7280 func = this 7281 comments = func.comments if isinstance(func, exp.Expression) else None 7282 7283 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7284 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7285 if self._match_text_seq("WITHIN", "GROUP"): 7286 order = self._parse_wrapped(self._parse_order) 7287 this = self.expression(exp.WithinGroup, this=this, expression=order) 7288 7289 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7290 self._match(TokenType.WHERE) 7291 this = self.expression( 7292 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7293 ) 7294 self._match_r_paren() 7295 7296 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7297 # Some dialects choose to implement and some do not. 7298 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7299 7300 # There is some code above in _parse_lambda that handles 7301 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7302 7303 # The below changes handle 7304 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7305 7306 # Oracle allows both formats 7307 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7308 # and Snowflake chose to do the same for familiarity 7309 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7310 if isinstance(this, exp.AggFunc): 7311 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7312 7313 if ignore_respect and ignore_respect is not this: 7314 ignore_respect.replace(ignore_respect.this) 7315 this = self.expression(ignore_respect.__class__, this=this) 7316 7317 this = self._parse_respect_or_ignore_nulls(this) 7318 7319 # bigquery select from window x AS (partition by ...) 7320 if alias: 7321 over = None 7322 self._match(TokenType.ALIAS) 7323 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7324 return this 7325 else: 7326 over = self._prev.text.upper() 7327 7328 if comments and isinstance(func, exp.Expression): 7329 func.pop_comments() 7330 7331 if not self._match(TokenType.L_PAREN): 7332 return self.expression( 7333 exp.Window, 7334 comments=comments, 7335 this=this, 7336 alias=self._parse_id_var(False), 7337 over=over, 7338 ) 7339 7340 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7341 7342 first = self._match(TokenType.FIRST) 7343 if self._match_text_seq("LAST"): 7344 first = False 7345 7346 partition, order = self._parse_partition_and_order() 7347 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7348 7349 if kind: 7350 self._match(TokenType.BETWEEN) 7351 start = self._parse_window_spec() 7352 7353 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7354 exclude = ( 7355 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7356 if self._match_text_seq("EXCLUDE") 7357 else None 7358 ) 7359 7360 spec = self.expression( 7361 exp.WindowSpec, 7362 kind=kind, 7363 start=start["value"], 7364 start_side=start["side"], 7365 end=end.get("value"), 7366 end_side=end.get("side"), 7367 exclude=exclude, 7368 ) 7369 else: 7370 spec = None 7371 7372 self._match_r_paren() 7373 7374 window = self.expression( 7375 exp.Window, 7376 comments=comments, 7377 this=this, 7378 partition_by=partition, 7379 order=order, 7380 spec=spec, 7381 alias=window_alias, 7382 over=over, 7383 first=first, 7384 ) 7385 7386 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7387 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7388 return self._parse_window(window, alias=alias) 7389 7390 return window 7391 7392 def _parse_partition_and_order( 7393 self, 7394 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7395 return self._parse_partition_by(), self._parse_order() 7396 7397 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7398 self._match(TokenType.BETWEEN) 7399 7400 return { 7401 "value": ( 7402 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7403 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7404 or self._parse_bitwise() 7405 ), 7406 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7407 } 7408 7409 def _parse_alias( 7410 self, this: t.Optional[exp.Expression], explicit: bool = False 7411 ) -> t.Optional[exp.Expression]: 7412 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7413 # so this section tries to parse the clause version and if it fails, it treats the token 7414 # as an identifier (alias) 7415 if self._can_parse_limit_or_offset(): 7416 return this 7417 7418 any_token = self._match(TokenType.ALIAS) 7419 comments = self._prev_comments or [] 7420 7421 if explicit and not any_token: 7422 return this 7423 7424 if self._match(TokenType.L_PAREN): 7425 aliases = self.expression( 7426 exp.Aliases, 7427 comments=comments, 7428 this=this, 7429 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7430 ) 7431 self._match_r_paren(aliases) 7432 return aliases 7433 7434 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7435 self.STRING_ALIASES and self._parse_string_as_identifier() 7436 ) 7437 7438 if alias: 7439 comments.extend(alias.pop_comments()) 7440 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7441 column = this.this 7442 7443 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7444 if not this.comments and column and column.comments: 7445 this.comments = column.pop_comments() 7446 7447 return this 7448 7449 def _parse_id_var( 7450 self, 7451 any_token: bool = True, 7452 tokens: t.Optional[t.Collection[TokenType]] = None, 7453 ) -> t.Optional[exp.Expression]: 7454 expression = self._parse_identifier() 7455 if not expression and ( 7456 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7457 ): 7458 quoted = self._prev.token_type == TokenType.STRING 7459 expression = self._identifier_expression(quoted=quoted) 7460 7461 return expression 7462 7463 def _parse_string(self) -> t.Optional[exp.Expression]: 7464 if self._match_set(self.STRING_PARSERS): 7465 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7466 return self._parse_placeholder() 7467 7468 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7469 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7470 if output: 7471 output.update_positions(self._prev) 7472 return output 7473 7474 def _parse_number(self) -> t.Optional[exp.Expression]: 7475 if self._match_set(self.NUMERIC_PARSERS): 7476 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7477 return self._parse_placeholder() 7478 7479 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7480 if self._match(TokenType.IDENTIFIER): 7481 return self._identifier_expression(quoted=True) 7482 return self._parse_placeholder() 7483 7484 def _parse_var( 7485 self, 7486 any_token: bool = False, 7487 tokens: t.Optional[t.Collection[TokenType]] = None, 7488 upper: bool = False, 7489 ) -> t.Optional[exp.Expression]: 7490 if ( 7491 (any_token and self._advance_any()) 7492 or self._match(TokenType.VAR) 7493 or (self._match_set(tokens) if tokens else False) 7494 ): 7495 return self.expression( 7496 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7497 ) 7498 return self._parse_placeholder() 7499 7500 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7501 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7502 self._advance() 7503 return self._prev 7504 return None 7505 7506 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7507 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7508 7509 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7510 return self._parse_primary() or self._parse_var(any_token=True) 7511 7512 def _parse_null(self) -> t.Optional[exp.Expression]: 7513 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7514 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7515 return self._parse_placeholder() 7516 7517 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7518 if self._match(TokenType.TRUE): 7519 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7520 if self._match(TokenType.FALSE): 7521 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7522 return self._parse_placeholder() 7523 7524 def _parse_star(self) -> t.Optional[exp.Expression]: 7525 if self._match(TokenType.STAR): 7526 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7527 return self._parse_placeholder() 7528 7529 def _parse_parameter(self) -> exp.Parameter: 7530 this = self._parse_identifier() or self._parse_primary_or_var() 7531 return self.expression(exp.Parameter, this=this) 7532 7533 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7534 if self._match_set(self.PLACEHOLDER_PARSERS): 7535 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7536 if placeholder: 7537 return placeholder 7538 self._advance(-1) 7539 return None 7540 7541 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7542 if not self._match_texts(keywords): 7543 return None 7544 if self._match(TokenType.L_PAREN, advance=False): 7545 return self._parse_wrapped_csv(self._parse_expression) 7546 7547 expression = self._parse_alias(self._parse_disjunction(), explicit=True) 7548 return [expression] if expression else None 7549 7550 def _parse_csv( 7551 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7552 ) -> t.List[exp.Expression]: 7553 parse_result = parse_method() 7554 items = [parse_result] if parse_result is not None else [] 7555 7556 while self._match(sep): 7557 self._add_comments(parse_result) 7558 parse_result = parse_method() 7559 if parse_result is not None: 7560 items.append(parse_result) 7561 7562 return items 7563 7564 def _parse_tokens( 7565 self, parse_method: t.Callable, expressions: t.Dict 7566 ) -> t.Optional[exp.Expression]: 7567 this = parse_method() 7568 7569 while self._match_set(expressions): 7570 this = self.expression( 7571 expressions[self._prev.token_type], 7572 this=this, 7573 comments=self._prev_comments, 7574 expression=parse_method(), 7575 ) 7576 7577 return this 7578 7579 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7580 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7581 7582 def _parse_wrapped_csv( 7583 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7584 ) -> t.List[exp.Expression]: 7585 return self._parse_wrapped( 7586 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7587 ) 7588 7589 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7590 wrapped = self._match(TokenType.L_PAREN) 7591 if not wrapped and not optional: 7592 self.raise_error("Expecting (") 7593 parse_result = parse_method() 7594 if wrapped: 7595 self._match_r_paren() 7596 return parse_result 7597 7598 def _parse_expressions(self) -> t.List[exp.Expression]: 7599 return self._parse_csv(self._parse_expression) 7600 7601 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7602 return ( 7603 self._parse_set_operations( 7604 self._parse_alias(self._parse_assignment(), explicit=True) 7605 if alias 7606 else self._parse_assignment() 7607 ) 7608 or self._parse_select() 7609 ) 7610 7611 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7612 return self._parse_query_modifiers( 7613 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7614 ) 7615 7616 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7617 this = None 7618 if self._match_texts(self.TRANSACTION_KIND): 7619 this = self._prev.text 7620 7621 self._match_texts(("TRANSACTION", "WORK")) 7622 7623 modes = [] 7624 while True: 7625 mode = [] 7626 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7627 mode.append(self._prev.text) 7628 7629 if mode: 7630 modes.append(" ".join(mode)) 7631 if not self._match(TokenType.COMMA): 7632 break 7633 7634 return self.expression(exp.Transaction, this=this, modes=modes) 7635 7636 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7637 chain = None 7638 savepoint = None 7639 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7640 7641 self._match_texts(("TRANSACTION", "WORK")) 7642 7643 if self._match_text_seq("TO"): 7644 self._match_text_seq("SAVEPOINT") 7645 savepoint = self._parse_id_var() 7646 7647 if self._match(TokenType.AND): 7648 chain = not self._match_text_seq("NO") 7649 self._match_text_seq("CHAIN") 7650 7651 if is_rollback: 7652 return self.expression(exp.Rollback, savepoint=savepoint) 7653 7654 return self.expression(exp.Commit, chain=chain) 7655 7656 def _parse_refresh(self) -> exp.Refresh | exp.Command: 7657 if self._match(TokenType.TABLE): 7658 kind = "TABLE" 7659 elif self._match_text_seq("MATERIALIZED", "VIEW"): 7660 kind = "MATERIALIZED VIEW" 7661 else: 7662 kind = "" 7663 7664 this = self._parse_string() or self._parse_table() 7665 if not kind and not isinstance(this, exp.Literal): 7666 return self._parse_as_command(self._prev) 7667 7668 return self.expression(exp.Refresh, this=this, kind=kind) 7669 7670 def _parse_column_def_with_exists(self): 7671 start = self._index 7672 self._match(TokenType.COLUMN) 7673 7674 exists_column = self._parse_exists(not_=True) 7675 expression = self._parse_field_def() 7676 7677 if not isinstance(expression, exp.ColumnDef): 7678 self._retreat(start) 7679 return None 7680 7681 expression.set("exists", exists_column) 7682 7683 return expression 7684 7685 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7686 if not self._prev.text.upper() == "ADD": 7687 return None 7688 7689 expression = self._parse_column_def_with_exists() 7690 if not expression: 7691 return None 7692 7693 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7694 if self._match_texts(("FIRST", "AFTER")): 7695 position = self._prev.text 7696 column_position = self.expression( 7697 exp.ColumnPosition, this=self._parse_column(), position=position 7698 ) 7699 expression.set("position", column_position) 7700 7701 return expression 7702 7703 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7704 drop = self._match(TokenType.DROP) and self._parse_drop() 7705 if drop and not isinstance(drop, exp.Command): 7706 drop.set("kind", drop.args.get("kind", "COLUMN")) 7707 return drop 7708 7709 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7710 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7711 return self.expression( 7712 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7713 ) 7714 7715 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7716 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7717 self._match_text_seq("ADD") 7718 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7719 return self.expression( 7720 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7721 ) 7722 7723 column_def = self._parse_add_column() 7724 if isinstance(column_def, exp.ColumnDef): 7725 return column_def 7726 7727 exists = self._parse_exists(not_=True) 7728 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7729 return self.expression( 7730 exp.AddPartition, 7731 exists=exists, 7732 this=self._parse_field(any_token=True), 7733 location=self._match_text_seq("LOCATION", advance=False) 7734 and self._parse_property(), 7735 ) 7736 7737 return None 7738 7739 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7740 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7741 or self._match_text_seq("COLUMNS") 7742 ): 7743 schema = self._parse_schema() 7744 7745 return ( 7746 ensure_list(schema) 7747 if schema 7748 else self._parse_csv(self._parse_column_def_with_exists) 7749 ) 7750 7751 return self._parse_csv(_parse_add_alteration) 7752 7753 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7754 if self._match_texts(self.ALTER_ALTER_PARSERS): 7755 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7756 7757 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7758 # keyword after ALTER we default to parsing this statement 7759 self._match(TokenType.COLUMN) 7760 column = self._parse_field(any_token=True) 7761 7762 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7763 return self.expression(exp.AlterColumn, this=column, drop=True) 7764 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7765 return self.expression(exp.AlterColumn, this=column, default=self._parse_disjunction()) 7766 if self._match(TokenType.COMMENT): 7767 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7768 if self._match_text_seq("DROP", "NOT", "NULL"): 7769 return self.expression( 7770 exp.AlterColumn, 7771 this=column, 7772 drop=True, 7773 allow_null=True, 7774 ) 7775 if self._match_text_seq("SET", "NOT", "NULL"): 7776 return self.expression( 7777 exp.AlterColumn, 7778 this=column, 7779 allow_null=False, 7780 ) 7781 7782 if self._match_text_seq("SET", "VISIBLE"): 7783 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7784 if self._match_text_seq("SET", "INVISIBLE"): 7785 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7786 7787 self._match_text_seq("SET", "DATA") 7788 self._match_text_seq("TYPE") 7789 return self.expression( 7790 exp.AlterColumn, 7791 this=column, 7792 dtype=self._parse_types(), 7793 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7794 using=self._match(TokenType.USING) and self._parse_disjunction(), 7795 ) 7796 7797 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7798 if self._match_texts(("ALL", "EVEN", "AUTO")): 7799 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7800 7801 self._match_text_seq("KEY", "DISTKEY") 7802 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7803 7804 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7805 if compound: 7806 self._match_text_seq("SORTKEY") 7807 7808 if self._match(TokenType.L_PAREN, advance=False): 7809 return self.expression( 7810 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7811 ) 7812 7813 self._match_texts(("AUTO", "NONE")) 7814 return self.expression( 7815 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7816 ) 7817 7818 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7819 index = self._index - 1 7820 7821 partition_exists = self._parse_exists() 7822 if self._match(TokenType.PARTITION, advance=False): 7823 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7824 7825 self._retreat(index) 7826 return self._parse_csv(self._parse_drop_column) 7827 7828 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7829 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7830 exists = self._parse_exists() 7831 old_column = self._parse_column() 7832 to = self._match_text_seq("TO") 7833 new_column = self._parse_column() 7834 7835 if old_column is None or to is None or new_column is None: 7836 return None 7837 7838 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7839 7840 self._match_text_seq("TO") 7841 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7842 7843 def _parse_alter_table_set(self) -> exp.AlterSet: 7844 alter_set = self.expression(exp.AlterSet) 7845 7846 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7847 "TABLE", "PROPERTIES" 7848 ): 7849 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7850 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7851 alter_set.set("expressions", [self._parse_assignment()]) 7852 elif self._match_texts(("LOGGED", "UNLOGGED")): 7853 alter_set.set("option", exp.var(self._prev.text.upper())) 7854 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7855 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7856 elif self._match_text_seq("LOCATION"): 7857 alter_set.set("location", self._parse_field()) 7858 elif self._match_text_seq("ACCESS", "METHOD"): 7859 alter_set.set("access_method", self._parse_field()) 7860 elif self._match_text_seq("TABLESPACE"): 7861 alter_set.set("tablespace", self._parse_field()) 7862 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7863 alter_set.set("file_format", [self._parse_field()]) 7864 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7865 alter_set.set("file_format", self._parse_wrapped_options()) 7866 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7867 alter_set.set("copy_options", self._parse_wrapped_options()) 7868 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7869 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7870 else: 7871 if self._match_text_seq("SERDE"): 7872 alter_set.set("serde", self._parse_field()) 7873 7874 properties = self._parse_wrapped(self._parse_properties, optional=True) 7875 alter_set.set("expressions", [properties]) 7876 7877 return alter_set 7878 7879 def _parse_alter_session(self) -> exp.AlterSession: 7880 """Parse ALTER SESSION SET/UNSET statements.""" 7881 if self._match(TokenType.SET): 7882 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7883 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7884 7885 self._match_text_seq("UNSET") 7886 expressions = self._parse_csv( 7887 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7888 ) 7889 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7890 7891 def _parse_alter(self) -> exp.Alter | exp.Command: 7892 start = self._prev 7893 7894 alter_token = self._match_set(self.ALTERABLES) and self._prev 7895 if not alter_token: 7896 return self._parse_as_command(start) 7897 7898 exists = self._parse_exists() 7899 only = self._match_text_seq("ONLY") 7900 7901 if alter_token.token_type == TokenType.SESSION: 7902 this = None 7903 check = None 7904 cluster = None 7905 else: 7906 this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS) 7907 check = self._match_text_seq("WITH", "CHECK") 7908 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7909 7910 if self._next: 7911 self._advance() 7912 7913 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7914 if parser: 7915 actions = ensure_list(parser(self)) 7916 not_valid = self._match_text_seq("NOT", "VALID") 7917 options = self._parse_csv(self._parse_property) 7918 cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE") 7919 7920 if not self._curr and actions: 7921 return self.expression( 7922 exp.Alter, 7923 this=this, 7924 kind=alter_token.text.upper(), 7925 exists=exists, 7926 actions=actions, 7927 only=only, 7928 options=options, 7929 cluster=cluster, 7930 not_valid=not_valid, 7931 check=check, 7932 cascade=cascade, 7933 ) 7934 7935 return self._parse_as_command(start) 7936 7937 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7938 start = self._prev 7939 # https://duckdb.org/docs/sql/statements/analyze 7940 if not self._curr: 7941 return self.expression(exp.Analyze) 7942 7943 options = [] 7944 while self._match_texts(self.ANALYZE_STYLES): 7945 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7946 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7947 else: 7948 options.append(self._prev.text.upper()) 7949 7950 this: t.Optional[exp.Expression] = None 7951 inner_expression: t.Optional[exp.Expression] = None 7952 7953 kind = self._curr and self._curr.text.upper() 7954 7955 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7956 this = self._parse_table_parts() 7957 elif self._match_text_seq("TABLES"): 7958 if self._match_set((TokenType.FROM, TokenType.IN)): 7959 kind = f"{kind} {self._prev.text.upper()}" 7960 this = self._parse_table(schema=True, is_db_reference=True) 7961 elif self._match_text_seq("DATABASE"): 7962 this = self._parse_table(schema=True, is_db_reference=True) 7963 elif self._match_text_seq("CLUSTER"): 7964 this = self._parse_table() 7965 # Try matching inner expr keywords before fallback to parse table. 7966 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7967 kind = None 7968 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7969 else: 7970 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7971 kind = None 7972 this = self._parse_table_parts() 7973 7974 partition = self._try_parse(self._parse_partition) 7975 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7976 return self._parse_as_command(start) 7977 7978 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7979 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7980 "WITH", "ASYNC", "MODE" 7981 ): 7982 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7983 else: 7984 mode = None 7985 7986 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7987 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7988 7989 properties = self._parse_properties() 7990 return self.expression( 7991 exp.Analyze, 7992 kind=kind, 7993 this=this, 7994 mode=mode, 7995 partition=partition, 7996 properties=properties, 7997 expression=inner_expression, 7998 options=options, 7999 ) 8000 8001 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 8002 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 8003 this = None 8004 kind = self._prev.text.upper() 8005 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 8006 expressions = [] 8007 8008 if not self._match_text_seq("STATISTICS"): 8009 self.raise_error("Expecting token STATISTICS") 8010 8011 if self._match_text_seq("NOSCAN"): 8012 this = "NOSCAN" 8013 elif self._match(TokenType.FOR): 8014 if self._match_text_seq("ALL", "COLUMNS"): 8015 this = "FOR ALL COLUMNS" 8016 if self._match_texts("COLUMNS"): 8017 this = "FOR COLUMNS" 8018 expressions = self._parse_csv(self._parse_column_reference) 8019 elif self._match_text_seq("SAMPLE"): 8020 sample = self._parse_number() 8021 expressions = [ 8022 self.expression( 8023 exp.AnalyzeSample, 8024 sample=sample, 8025 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 8026 ) 8027 ] 8028 8029 return self.expression( 8030 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 8031 ) 8032 8033 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 8034 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 8035 kind = None 8036 this = None 8037 expression: t.Optional[exp.Expression] = None 8038 if self._match_text_seq("REF", "UPDATE"): 8039 kind = "REF" 8040 this = "UPDATE" 8041 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 8042 this = "UPDATE SET DANGLING TO NULL" 8043 elif self._match_text_seq("STRUCTURE"): 8044 kind = "STRUCTURE" 8045 if self._match_text_seq("CASCADE", "FAST"): 8046 this = "CASCADE FAST" 8047 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 8048 ("ONLINE", "OFFLINE") 8049 ): 8050 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 8051 expression = self._parse_into() 8052 8053 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 8054 8055 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 8056 this = self._prev.text.upper() 8057 if self._match_text_seq("COLUMNS"): 8058 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 8059 return None 8060 8061 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 8062 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 8063 if self._match_text_seq("STATISTICS"): 8064 return self.expression(exp.AnalyzeDelete, kind=kind) 8065 return None 8066 8067 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 8068 if self._match_text_seq("CHAINED", "ROWS"): 8069 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 8070 return None 8071 8072 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 8073 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 8074 this = self._prev.text.upper() 8075 expression: t.Optional[exp.Expression] = None 8076 expressions = [] 8077 update_options = None 8078 8079 if self._match_text_seq("HISTOGRAM", "ON"): 8080 expressions = self._parse_csv(self._parse_column_reference) 8081 with_expressions = [] 8082 while self._match(TokenType.WITH): 8083 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 8084 if self._match_texts(("SYNC", "ASYNC")): 8085 if self._match_text_seq("MODE", advance=False): 8086 with_expressions.append(f"{self._prev.text.upper()} MODE") 8087 self._advance() 8088 else: 8089 buckets = self._parse_number() 8090 if self._match_text_seq("BUCKETS"): 8091 with_expressions.append(f"{buckets} BUCKETS") 8092 if with_expressions: 8093 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 8094 8095 if self._match_texts(("MANUAL", "AUTO")) and self._match( 8096 TokenType.UPDATE, advance=False 8097 ): 8098 update_options = self._prev.text.upper() 8099 self._advance() 8100 elif self._match_text_seq("USING", "DATA"): 8101 expression = self.expression(exp.UsingData, this=self._parse_string()) 8102 8103 return self.expression( 8104 exp.AnalyzeHistogram, 8105 this=this, 8106 expressions=expressions, 8107 expression=expression, 8108 update_options=update_options, 8109 ) 8110 8111 def _parse_merge(self) -> exp.Merge: 8112 self._match(TokenType.INTO) 8113 target = self._parse_table() 8114 8115 if target and self._match(TokenType.ALIAS, advance=False): 8116 target.set("alias", self._parse_table_alias()) 8117 8118 self._match(TokenType.USING) 8119 using = self._parse_table() 8120 8121 return self.expression( 8122 exp.Merge, 8123 this=target, 8124 using=using, 8125 on=self._match(TokenType.ON) and self._parse_disjunction(), 8126 using_cond=self._match(TokenType.USING) and self._parse_using_identifiers(), 8127 whens=self._parse_when_matched(), 8128 returning=self._parse_returning(), 8129 ) 8130 8131 def _parse_when_matched(self) -> exp.Whens: 8132 whens = [] 8133 8134 while self._match(TokenType.WHEN): 8135 matched = not self._match(TokenType.NOT) 8136 self._match_text_seq("MATCHED") 8137 source = ( 8138 False 8139 if self._match_text_seq("BY", "TARGET") 8140 else self._match_text_seq("BY", "SOURCE") 8141 ) 8142 condition = self._parse_disjunction() if self._match(TokenType.AND) else None 8143 8144 self._match(TokenType.THEN) 8145 8146 if self._match(TokenType.INSERT): 8147 this = self._parse_star() 8148 if this: 8149 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 8150 else: 8151 then = self.expression( 8152 exp.Insert, 8153 this=exp.var("ROW") 8154 if self._match_text_seq("ROW") 8155 else self._parse_value(values=False), 8156 expression=self._match_text_seq("VALUES") and self._parse_value(), 8157 ) 8158 elif self._match(TokenType.UPDATE): 8159 expressions = self._parse_star() 8160 if expressions: 8161 then = self.expression(exp.Update, expressions=expressions) 8162 else: 8163 then = self.expression( 8164 exp.Update, 8165 expressions=self._match(TokenType.SET) 8166 and self._parse_csv(self._parse_equality), 8167 ) 8168 elif self._match(TokenType.DELETE): 8169 then = self.expression(exp.Var, this=self._prev.text) 8170 else: 8171 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 8172 8173 whens.append( 8174 self.expression( 8175 exp.When, 8176 matched=matched, 8177 source=source, 8178 condition=condition, 8179 then=then, 8180 ) 8181 ) 8182 return self.expression(exp.Whens, expressions=whens) 8183 8184 def _parse_show(self) -> t.Optional[exp.Expression]: 8185 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 8186 if parser: 8187 return parser(self) 8188 return self._parse_as_command(self._prev) 8189 8190 def _parse_set_item_assignment( 8191 self, kind: t.Optional[str] = None 8192 ) -> t.Optional[exp.Expression]: 8193 index = self._index 8194 8195 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 8196 return self._parse_set_transaction(global_=kind == "GLOBAL") 8197 8198 left = self._parse_primary() or self._parse_column() 8199 assignment_delimiter = self._match_texts(self.SET_ASSIGNMENT_DELIMITERS) 8200 8201 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 8202 self._retreat(index) 8203 return None 8204 8205 right = self._parse_statement() or self._parse_id_var() 8206 if isinstance(right, (exp.Column, exp.Identifier)): 8207 right = exp.var(right.name) 8208 8209 this = self.expression(exp.EQ, this=left, expression=right) 8210 return self.expression(exp.SetItem, this=this, kind=kind) 8211 8212 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 8213 self._match_text_seq("TRANSACTION") 8214 characteristics = self._parse_csv( 8215 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 8216 ) 8217 return self.expression( 8218 exp.SetItem, 8219 expressions=characteristics, 8220 kind="TRANSACTION", 8221 global_=global_, 8222 ) 8223 8224 def _parse_set_item(self) -> t.Optional[exp.Expression]: 8225 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 8226 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 8227 8228 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 8229 index = self._index 8230 set_ = self.expression( 8231 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 8232 ) 8233 8234 if self._curr: 8235 self._retreat(index) 8236 return self._parse_as_command(self._prev) 8237 8238 return set_ 8239 8240 def _parse_var_from_options( 8241 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8242 ) -> t.Optional[exp.Var]: 8243 start = self._curr 8244 if not start: 8245 return None 8246 8247 option = start.text.upper() 8248 continuations = options.get(option) 8249 8250 index = self._index 8251 self._advance() 8252 for keywords in continuations or []: 8253 if isinstance(keywords, str): 8254 keywords = (keywords,) 8255 8256 if self._match_text_seq(*keywords): 8257 option = f"{option} {' '.join(keywords)}" 8258 break 8259 else: 8260 if continuations or continuations is None: 8261 if raise_unmatched: 8262 self.raise_error(f"Unknown option {option}") 8263 8264 self._retreat(index) 8265 return None 8266 8267 return exp.var(option) 8268 8269 def _parse_as_command(self, start: Token) -> exp.Command: 8270 while self._curr: 8271 self._advance() 8272 text = self._find_sql(start, self._prev) 8273 size = len(start.text) 8274 self._warn_unsupported() 8275 return exp.Command(this=text[:size], expression=text[size:]) 8276 8277 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8278 settings = [] 8279 8280 self._match_l_paren() 8281 kind = self._parse_id_var() 8282 8283 if self._match(TokenType.L_PAREN): 8284 while True: 8285 key = self._parse_id_var() 8286 value = self._parse_primary() 8287 if not key and value is None: 8288 break 8289 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8290 self._match(TokenType.R_PAREN) 8291 8292 self._match_r_paren() 8293 8294 return self.expression( 8295 exp.DictProperty, 8296 this=this, 8297 kind=kind.this if kind else None, 8298 settings=settings, 8299 ) 8300 8301 def _parse_dict_range(self, this: str) -> exp.DictRange: 8302 self._match_l_paren() 8303 has_min = self._match_text_seq("MIN") 8304 if has_min: 8305 min = self._parse_var() or self._parse_primary() 8306 self._match_text_seq("MAX") 8307 max = self._parse_var() or self._parse_primary() 8308 else: 8309 max = self._parse_var() or self._parse_primary() 8310 min = exp.Literal.number(0) 8311 self._match_r_paren() 8312 return self.expression(exp.DictRange, this=this, min=min, max=max) 8313 8314 def _parse_comprehension( 8315 self, this: t.Optional[exp.Expression] 8316 ) -> t.Optional[exp.Comprehension]: 8317 index = self._index 8318 expression = self._parse_column() 8319 position = self._match(TokenType.COMMA) and self._parse_column() 8320 8321 if not self._match(TokenType.IN): 8322 self._retreat(index - 1) 8323 return None 8324 iterator = self._parse_column() 8325 condition = self._parse_disjunction() if self._match_text_seq("IF") else None 8326 return self.expression( 8327 exp.Comprehension, 8328 this=this, 8329 expression=expression, 8330 position=position, 8331 iterator=iterator, 8332 condition=condition, 8333 ) 8334 8335 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8336 if self._match(TokenType.HEREDOC_STRING): 8337 return self.expression(exp.Heredoc, this=self._prev.text) 8338 8339 if not self._match_text_seq("$"): 8340 return None 8341 8342 tags = ["$"] 8343 tag_text = None 8344 8345 if self._is_connected(): 8346 self._advance() 8347 tags.append(self._prev.text.upper()) 8348 else: 8349 self.raise_error("No closing $ found") 8350 8351 if tags[-1] != "$": 8352 if self._is_connected() and self._match_text_seq("$"): 8353 tag_text = tags[-1] 8354 tags.append("$") 8355 else: 8356 self.raise_error("No closing $ found") 8357 8358 heredoc_start = self._curr 8359 8360 while self._curr: 8361 if self._match_text_seq(*tags, advance=False): 8362 this = self._find_sql(heredoc_start, self._prev) 8363 self._advance(len(tags)) 8364 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8365 8366 self._advance() 8367 8368 self.raise_error(f"No closing {''.join(tags)} found") 8369 return None 8370 8371 def _find_parser( 8372 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8373 ) -> t.Optional[t.Callable]: 8374 if not self._curr: 8375 return None 8376 8377 index = self._index 8378 this = [] 8379 while True: 8380 # The current token might be multiple words 8381 curr = self._curr.text.upper() 8382 key = curr.split(" ") 8383 this.append(curr) 8384 8385 self._advance() 8386 result, trie = in_trie(trie, key) 8387 if result == TrieResult.FAILED: 8388 break 8389 8390 if result == TrieResult.EXISTS: 8391 subparser = parsers[" ".join(this)] 8392 return subparser 8393 8394 self._retreat(index) 8395 return None 8396 8397 def _match(self, token_type, advance=True, expression=None): 8398 if not self._curr: 8399 return None 8400 8401 if self._curr.token_type == token_type: 8402 if advance: 8403 self._advance() 8404 self._add_comments(expression) 8405 return True 8406 8407 return None 8408 8409 def _match_set(self, types, advance=True): 8410 if not self._curr: 8411 return None 8412 8413 if self._curr.token_type in types: 8414 if advance: 8415 self._advance() 8416 return True 8417 8418 return None 8419 8420 def _match_pair(self, token_type_a, token_type_b, advance=True): 8421 if not self._curr or not self._next: 8422 return None 8423 8424 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8425 if advance: 8426 self._advance(2) 8427 return True 8428 8429 return None 8430 8431 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8432 if not self._match(TokenType.L_PAREN, expression=expression): 8433 self.raise_error("Expecting (") 8434 8435 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8436 if not self._match(TokenType.R_PAREN, expression=expression): 8437 self.raise_error("Expecting )") 8438 8439 def _match_texts(self, texts, advance=True): 8440 if ( 8441 self._curr 8442 and self._curr.token_type != TokenType.STRING 8443 and self._curr.text.upper() in texts 8444 ): 8445 if advance: 8446 self._advance() 8447 return True 8448 return None 8449 8450 def _match_text_seq(self, *texts, advance=True): 8451 index = self._index 8452 for text in texts: 8453 if ( 8454 self._curr 8455 and self._curr.token_type != TokenType.STRING 8456 and self._curr.text.upper() == text 8457 ): 8458 self._advance() 8459 else: 8460 self._retreat(index) 8461 return None 8462 8463 if not advance: 8464 self._retreat(index) 8465 8466 return True 8467 8468 def _replace_lambda( 8469 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8470 ) -> t.Optional[exp.Expression]: 8471 if not node: 8472 return node 8473 8474 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8475 8476 for column in node.find_all(exp.Column): 8477 typ = lambda_types.get(column.parts[0].name) 8478 if typ is not None: 8479 dot_or_id = column.to_dot() if column.table else column.this 8480 8481 if typ: 8482 dot_or_id = self.expression( 8483 exp.Cast, 8484 this=dot_or_id, 8485 to=typ, 8486 ) 8487 8488 parent = column.parent 8489 8490 while isinstance(parent, exp.Dot): 8491 if not isinstance(parent.parent, exp.Dot): 8492 parent.replace(dot_or_id) 8493 break 8494 parent = parent.parent 8495 else: 8496 if column is node: 8497 node = dot_or_id 8498 else: 8499 column.replace(dot_or_id) 8500 return node 8501 8502 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8503 start = self._prev 8504 8505 # Not to be confused with TRUNCATE(number, decimals) function call 8506 if self._match(TokenType.L_PAREN): 8507 self._retreat(self._index - 2) 8508 return self._parse_function() 8509 8510 # Clickhouse supports TRUNCATE DATABASE as well 8511 is_database = self._match(TokenType.DATABASE) 8512 8513 self._match(TokenType.TABLE) 8514 8515 exists = self._parse_exists(not_=False) 8516 8517 expressions = self._parse_csv( 8518 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8519 ) 8520 8521 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8522 8523 if self._match_text_seq("RESTART", "IDENTITY"): 8524 identity = "RESTART" 8525 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8526 identity = "CONTINUE" 8527 else: 8528 identity = None 8529 8530 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8531 option = self._prev.text 8532 else: 8533 option = None 8534 8535 partition = self._parse_partition() 8536 8537 # Fallback case 8538 if self._curr: 8539 return self._parse_as_command(start) 8540 8541 return self.expression( 8542 exp.TruncateTable, 8543 expressions=expressions, 8544 is_database=is_database, 8545 exists=exists, 8546 cluster=cluster, 8547 identity=identity, 8548 option=option, 8549 partition=partition, 8550 ) 8551 8552 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8553 this = self._parse_ordered(self._parse_opclass) 8554 8555 if not self._match(TokenType.WITH): 8556 return this 8557 8558 op = self._parse_var(any_token=True) 8559 8560 return self.expression(exp.WithOperator, this=this, op=op) 8561 8562 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8563 self._match(TokenType.EQ) 8564 self._match(TokenType.L_PAREN) 8565 8566 opts: t.List[t.Optional[exp.Expression]] = [] 8567 option: exp.Expression | None 8568 while self._curr and not self._match(TokenType.R_PAREN): 8569 if self._match_text_seq("FORMAT_NAME", "="): 8570 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8571 option = self._parse_format_name() 8572 else: 8573 option = self._parse_property() 8574 8575 if option is None: 8576 self.raise_error("Unable to parse option") 8577 break 8578 8579 opts.append(option) 8580 8581 return opts 8582 8583 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8584 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8585 8586 options = [] 8587 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8588 option = self._parse_var(any_token=True) 8589 prev = self._prev.text.upper() 8590 8591 # Different dialects might separate options and values by white space, "=" and "AS" 8592 self._match(TokenType.EQ) 8593 self._match(TokenType.ALIAS) 8594 8595 param = self.expression(exp.CopyParameter, this=option) 8596 8597 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8598 TokenType.L_PAREN, advance=False 8599 ): 8600 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8601 param.set("expressions", self._parse_wrapped_options()) 8602 elif prev == "FILE_FORMAT": 8603 # T-SQL's external file format case 8604 param.set("expression", self._parse_field()) 8605 elif ( 8606 prev == "FORMAT" 8607 and self._prev.token_type == TokenType.ALIAS 8608 and self._match_texts(("AVRO", "JSON")) 8609 ): 8610 param.set("this", exp.var(f"FORMAT AS {self._prev.text.upper()}")) 8611 param.set("expression", self._parse_field()) 8612 else: 8613 param.set("expression", self._parse_unquoted_field() or self._parse_bracket()) 8614 8615 options.append(param) 8616 self._match(sep) 8617 8618 return options 8619 8620 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8621 expr = self.expression(exp.Credentials) 8622 8623 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8624 expr.set("storage", self._parse_field()) 8625 if self._match_text_seq("CREDENTIALS"): 8626 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8627 creds = ( 8628 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8629 ) 8630 expr.set("credentials", creds) 8631 if self._match_text_seq("ENCRYPTION"): 8632 expr.set("encryption", self._parse_wrapped_options()) 8633 if self._match_text_seq("IAM_ROLE"): 8634 expr.set( 8635 "iam_role", 8636 exp.var(self._prev.text) if self._match(TokenType.DEFAULT) else self._parse_field(), 8637 ) 8638 if self._match_text_seq("REGION"): 8639 expr.set("region", self._parse_field()) 8640 8641 return expr 8642 8643 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8644 return self._parse_field() 8645 8646 def _parse_copy(self) -> exp.Copy | exp.Command: 8647 start = self._prev 8648 8649 self._match(TokenType.INTO) 8650 8651 this = ( 8652 self._parse_select(nested=True, parse_subquery_alias=False) 8653 if self._match(TokenType.L_PAREN, advance=False) 8654 else self._parse_table(schema=True) 8655 ) 8656 8657 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8658 8659 files = self._parse_csv(self._parse_file_location) 8660 if self._match(TokenType.EQ, advance=False): 8661 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8662 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8663 # list via `_parse_wrapped(..)` below. 8664 self._advance(-1) 8665 files = [] 8666 8667 credentials = self._parse_credentials() 8668 8669 self._match_text_seq("WITH") 8670 8671 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8672 8673 # Fallback case 8674 if self._curr: 8675 return self._parse_as_command(start) 8676 8677 return self.expression( 8678 exp.Copy, 8679 this=this, 8680 kind=kind, 8681 credentials=credentials, 8682 files=files, 8683 params=params, 8684 ) 8685 8686 def _parse_normalize(self) -> exp.Normalize: 8687 return self.expression( 8688 exp.Normalize, 8689 this=self._parse_bitwise(), 8690 form=self._match(TokenType.COMMA) and self._parse_var(), 8691 ) 8692 8693 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8694 args = self._parse_csv(lambda: self._parse_lambda()) 8695 8696 this = seq_get(args, 0) 8697 decimals = seq_get(args, 1) 8698 8699 return expr_type( 8700 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8701 ) 8702 8703 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8704 star_token = self._prev 8705 8706 if self._match_text_seq("COLUMNS", "(", advance=False): 8707 this = self._parse_function() 8708 if isinstance(this, exp.Columns): 8709 this.set("unpack", True) 8710 return this 8711 8712 return self.expression( 8713 exp.Star, 8714 except_=self._parse_star_op("EXCEPT", "EXCLUDE"), 8715 replace=self._parse_star_op("REPLACE"), 8716 rename=self._parse_star_op("RENAME"), 8717 ).update_positions(star_token) 8718 8719 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8720 privilege_parts = [] 8721 8722 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8723 # (end of privilege list) or L_PAREN (start of column list) are met 8724 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8725 privilege_parts.append(self._curr.text.upper()) 8726 self._advance() 8727 8728 this = exp.var(" ".join(privilege_parts)) 8729 expressions = ( 8730 self._parse_wrapped_csv(self._parse_column) 8731 if self._match(TokenType.L_PAREN, advance=False) 8732 else None 8733 ) 8734 8735 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8736 8737 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8738 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8739 principal = self._parse_id_var() 8740 8741 if not principal: 8742 return None 8743 8744 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8745 8746 def _parse_grant_revoke_common( 8747 self, 8748 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8749 privileges = self._parse_csv(self._parse_grant_privilege) 8750 8751 self._match(TokenType.ON) 8752 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8753 8754 # Attempt to parse the securable e.g. MySQL allows names 8755 # such as "foo.*", "*.*" which are not easily parseable yet 8756 securable = self._try_parse(self._parse_table_parts) 8757 8758 return privileges, kind, securable 8759 8760 def _parse_grant(self) -> exp.Grant | exp.Command: 8761 start = self._prev 8762 8763 privileges, kind, securable = self._parse_grant_revoke_common() 8764 8765 if not securable or not self._match_text_seq("TO"): 8766 return self._parse_as_command(start) 8767 8768 principals = self._parse_csv(self._parse_grant_principal) 8769 8770 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8771 8772 if self._curr: 8773 return self._parse_as_command(start) 8774 8775 return self.expression( 8776 exp.Grant, 8777 privileges=privileges, 8778 kind=kind, 8779 securable=securable, 8780 principals=principals, 8781 grant_option=grant_option, 8782 ) 8783 8784 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8785 start = self._prev 8786 8787 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8788 8789 privileges, kind, securable = self._parse_grant_revoke_common() 8790 8791 if not securable or not self._match_text_seq("FROM"): 8792 return self._parse_as_command(start) 8793 8794 principals = self._parse_csv(self._parse_grant_principal) 8795 8796 cascade = None 8797 if self._match_texts(("CASCADE", "RESTRICT")): 8798 cascade = self._prev.text.upper() 8799 8800 if self._curr: 8801 return self._parse_as_command(start) 8802 8803 return self.expression( 8804 exp.Revoke, 8805 privileges=privileges, 8806 kind=kind, 8807 securable=securable, 8808 principals=principals, 8809 grant_option=grant_option, 8810 cascade=cascade, 8811 ) 8812 8813 def _parse_overlay(self) -> exp.Overlay: 8814 def _parse_overlay_arg(text: str) -> t.Optional[exp.Expression]: 8815 return ( 8816 self._match(TokenType.COMMA) or self._match_text_seq(text) 8817 ) and self._parse_bitwise() 8818 8819 return self.expression( 8820 exp.Overlay, 8821 this=self._parse_bitwise(), 8822 expression=_parse_overlay_arg("PLACING"), 8823 from_=_parse_overlay_arg("FROM"), 8824 for_=_parse_overlay_arg("FOR"), 8825 ) 8826 8827 def _parse_format_name(self) -> exp.Property: 8828 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8829 # for FILE_FORMAT = <format_name> 8830 return self.expression( 8831 exp.Property, 8832 this=exp.var("FORMAT_NAME"), 8833 value=self._parse_string() or self._parse_table_parts(), 8834 ) 8835 8836 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8837 args: t.List[exp.Expression] = [] 8838 8839 if self._match(TokenType.DISTINCT): 8840 args.append(self.expression(exp.Distinct, expressions=[self._parse_lambda()])) 8841 self._match(TokenType.COMMA) 8842 8843 args.extend(self._parse_function_args()) 8844 8845 return self.expression( 8846 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8847 ) 8848 8849 def _identifier_expression( 8850 self, token: t.Optional[Token] = None, **kwargs: t.Any 8851 ) -> exp.Identifier: 8852 return self.expression(exp.Identifier, token=token or self._prev, **kwargs) 8853 8854 def _build_pipe_cte( 8855 self, 8856 query: exp.Query, 8857 expressions: t.List[exp.Expression], 8858 alias_cte: t.Optional[exp.TableAlias] = None, 8859 ) -> exp.Select: 8860 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8861 if alias_cte: 8862 new_cte = alias_cte 8863 else: 8864 self._pipe_cte_counter += 1 8865 new_cte = f"__tmp{self._pipe_cte_counter}" 8866 8867 with_ = query.args.get("with_") 8868 ctes = with_.pop() if with_ else None 8869 8870 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8871 if ctes: 8872 new_select.set("with_", ctes) 8873 8874 return new_select.with_(new_cte, as_=query, copy=False) 8875 8876 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8877 select = self._parse_select(consume_pipe=False) 8878 if not select: 8879 return query 8880 8881 return self._build_pipe_cte( 8882 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8883 ) 8884 8885 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8886 limit = self._parse_limit() 8887 offset = self._parse_offset() 8888 if limit: 8889 curr_limit = query.args.get("limit", limit) 8890 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8891 query.limit(limit, copy=False) 8892 if offset: 8893 curr_offset = query.args.get("offset") 8894 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8895 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8896 8897 return query 8898 8899 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8900 this = self._parse_disjunction() 8901 if self._match_text_seq("GROUP", "AND", advance=False): 8902 return this 8903 8904 this = self._parse_alias(this) 8905 8906 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8907 return self._parse_ordered(lambda: this) 8908 8909 return this 8910 8911 def _parse_pipe_syntax_aggregate_group_order_by( 8912 self, query: exp.Select, group_by_exists: bool = True 8913 ) -> exp.Select: 8914 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8915 aggregates_or_groups, orders = [], [] 8916 for element in expr: 8917 if isinstance(element, exp.Ordered): 8918 this = element.this 8919 if isinstance(this, exp.Alias): 8920 element.set("this", this.args["alias"]) 8921 orders.append(element) 8922 else: 8923 this = element 8924 aggregates_or_groups.append(this) 8925 8926 if group_by_exists: 8927 query.select(*aggregates_or_groups, copy=False).group_by( 8928 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8929 copy=False, 8930 ) 8931 else: 8932 query.select(*aggregates_or_groups, append=False, copy=False) 8933 8934 if orders: 8935 return query.order_by(*orders, append=False, copy=False) 8936 8937 return query 8938 8939 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8940 self._match_text_seq("AGGREGATE") 8941 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8942 8943 if self._match(TokenType.GROUP_BY) or ( 8944 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8945 ): 8946 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8947 8948 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8949 8950 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8951 first_setop = self.parse_set_operation(this=query) 8952 if not first_setop: 8953 return None 8954 8955 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8956 expr = self._parse_paren() 8957 return expr.assert_is(exp.Subquery).unnest() if expr else None 8958 8959 first_setop.this.pop() 8960 8961 setops = [ 8962 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8963 *self._parse_csv(_parse_and_unwrap_query), 8964 ] 8965 8966 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8967 with_ = query.args.get("with_") 8968 ctes = with_.pop() if with_ else None 8969 8970 if isinstance(first_setop, exp.Union): 8971 query = query.union(*setops, copy=False, **first_setop.args) 8972 elif isinstance(first_setop, exp.Except): 8973 query = query.except_(*setops, copy=False, **first_setop.args) 8974 else: 8975 query = query.intersect(*setops, copy=False, **first_setop.args) 8976 8977 query.set("with_", ctes) 8978 8979 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8980 8981 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8982 join = self._parse_join() 8983 if not join: 8984 return None 8985 8986 if isinstance(query, exp.Select): 8987 return query.join(join, copy=False) 8988 8989 return query 8990 8991 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8992 pivots = self._parse_pivots() 8993 if not pivots: 8994 return query 8995 8996 from_ = query.args.get("from_") 8997 if from_: 8998 from_.this.set("pivots", pivots) 8999 9000 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9001 9002 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 9003 self._match_text_seq("EXTEND") 9004 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 9005 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9006 9007 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 9008 sample = self._parse_table_sample() 9009 9010 with_ = query.args.get("with_") 9011 if with_: 9012 with_.expressions[-1].this.set("sample", sample) 9013 else: 9014 query.set("sample", sample) 9015 9016 return query 9017 9018 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 9019 if isinstance(query, exp.Subquery): 9020 query = exp.select("*").from_(query, copy=False) 9021 9022 if not query.args.get("from_"): 9023 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 9024 9025 while self._match(TokenType.PIPE_GT): 9026 start = self._curr 9027 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 9028 if not parser: 9029 # The set operators (UNION, etc) and the JOIN operator have a few common starting 9030 # keywords, making it tricky to disambiguate them without lookahead. The approach 9031 # here is to try and parse a set operation and if that fails, then try to parse a 9032 # join operator. If that fails as well, then the operator is not supported. 9033 parsed_query = self._parse_pipe_syntax_set_operator(query) 9034 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 9035 if not parsed_query: 9036 self._retreat(start) 9037 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 9038 break 9039 query = parsed_query 9040 else: 9041 query = parser(self, query) 9042 9043 return query 9044 9045 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 9046 vars = self._parse_csv(self._parse_id_var) 9047 if not vars: 9048 return None 9049 9050 return self.expression( 9051 exp.DeclareItem, 9052 this=vars, 9053 kind=self._parse_types(), 9054 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 9055 ) 9056 9057 def _parse_declare(self) -> exp.Declare | exp.Command: 9058 start = self._prev 9059 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 9060 9061 if not expressions or self._curr: 9062 return self._parse_as_command(start) 9063 9064 return self.expression(exp.Declare, expressions=expressions) 9065 9066 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 9067 exp_class = exp.Cast if strict else exp.TryCast 9068 9069 if exp_class == exp.TryCast: 9070 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 9071 9072 return self.expression(exp_class, **kwargs) 9073 9074 def _parse_json_value(self) -> exp.JSONValue: 9075 this = self._parse_bitwise() 9076 self._match(TokenType.COMMA) 9077 path = self._parse_bitwise() 9078 9079 returning = self._match(TokenType.RETURNING) and self._parse_type() 9080 9081 return self.expression( 9082 exp.JSONValue, 9083 this=this, 9084 path=self.dialect.to_json_path(path), 9085 returning=returning, 9086 on_condition=self._parse_on_condition(), 9087 ) 9088 9089 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 9090 def concat_exprs( 9091 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 9092 ) -> exp.Expression: 9093 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 9094 concat_exprs = [ 9095 self.expression( 9096 exp.Concat, 9097 expressions=node.expressions, 9098 safe=True, 9099 coalesce=self.dialect.CONCAT_COALESCE, 9100 ) 9101 ] 9102 node.set("expressions", concat_exprs) 9103 return node 9104 if len(exprs) == 1: 9105 return exprs[0] 9106 return self.expression( 9107 exp.Concat, expressions=args, safe=True, coalesce=self.dialect.CONCAT_COALESCE 9108 ) 9109 9110 args = self._parse_csv(self._parse_lambda) 9111 9112 if args: 9113 order = args[-1] if isinstance(args[-1], exp.Order) else None 9114 9115 if order: 9116 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 9117 # remove 'expr' from exp.Order and add it back to args 9118 args[-1] = order.this 9119 order.set("this", concat_exprs(order.this, args)) 9120 9121 this = order or concat_exprs(args[0], args) 9122 else: 9123 this = None 9124 9125 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 9126 9127 return self.expression(exp.GroupConcat, this=this, separator=separator) 9128 9129 def _parse_initcap(self) -> exp.Initcap: 9130 expr = exp.Initcap.from_arg_list(self._parse_function_args()) 9131 9132 # attach dialect's default delimiters 9133 if expr.args.get("expression") is None: 9134 expr.set("expression", exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS)) 9135 9136 return expr 9137 9138 def _parse_operator(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 9139 while True: 9140 if not self._match(TokenType.L_PAREN): 9141 break 9142 9143 op = "" 9144 while self._curr and not self._match(TokenType.R_PAREN): 9145 op += self._curr.text 9146 self._advance() 9147 9148 this = self.expression( 9149 exp.Operator, 9150 comments=self._prev_comments, 9151 this=this, 9152 operator=op, 9153 expression=self._parse_bitwise(), 9154 ) 9155 9156 if not self._match(TokenType.OPERATOR): 9157 break 9158 9159 return this
39def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 40 if len(args) == 1 and args[0].is_star: 41 return exp.StarMap(this=args[0]) 42 43 keys = [] 44 values = [] 45 for i in range(0, len(args), 2): 46 keys.append(args[i]) 47 values.append(args[i + 1]) 48 49 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
57def binary_range_parser( 58 expr_type: t.Type[exp.Expression], reverse_args: bool = False 59) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 60 def _parse_binary_range( 61 self: Parser, this: t.Optional[exp.Expression] 62 ) -> t.Optional[exp.Expression]: 63 expression = self._parse_bitwise() 64 if reverse_args: 65 this, expression = expression, this 66 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 67 68 return _parse_binary_range
71def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 72 # Default argument order is base, expression 73 this = seq_get(args, 0) 74 expression = seq_get(args, 1) 75 76 if expression: 77 if not dialect.LOG_BASE_FIRST: 78 this, expression = expression, this 79 return exp.Log(this=this, expression=expression) 80 81 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
101def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 102 def _builder(args: t.List, dialect: Dialect) -> E: 103 expression = expr_type( 104 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 105 ) 106 if len(args) > 2 and expr_type is exp.JSONExtract: 107 expression.set("expressions", args[2:]) 108 if expr_type is exp.JSONExtractScalar: 109 expression.set("scalar_only", dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY) 110 111 return expression 112 113 return _builder
116def build_mod(args: t.List) -> exp.Mod: 117 this = seq_get(args, 0) 118 expression = seq_get(args, 1) 119 120 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 121 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 122 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 123 124 return exp.Mod(this=this, expression=expression)
136def build_array_constructor( 137 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 138) -> exp.Expression: 139 array_exp = exp_class(expressions=args) 140 141 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 142 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 143 144 return array_exp
147def build_convert_timezone( 148 args: t.List, default_source_tz: t.Optional[str] = None 149) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 150 if len(args) == 2: 151 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 152 return exp.ConvertTimezone( 153 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 154 ) 155 156 return exp.ConvertTimezone.from_arg_list(args)
159def build_trim(args: t.List, is_left: bool = True, reverse_args: bool = False): 160 this, expression = seq_get(args, 0), seq_get(args, 1) 161 162 if expression and reverse_args: 163 this, expression = expression, this 164 165 return exp.Trim(this=this, expression=expression, position="LEADING" if is_left else "TRAILING")
192class Parser(metaclass=_Parser): 193 """ 194 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 195 196 Args: 197 error_level: The desired error level. 198 Default: ErrorLevel.IMMEDIATE 199 error_message_context: The amount of context to capture from a query string when displaying 200 the error message (in number of characters). 201 Default: 100 202 max_errors: Maximum number of error messages to include in a raised ParseError. 203 This is only relevant if error_level is ErrorLevel.RAISE. 204 Default: 3 205 """ 206 207 FUNCTIONS: t.Dict[str, t.Callable] = { 208 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 209 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 210 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 211 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 212 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 213 ), 214 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 215 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 216 ), 217 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 218 "CONCAT": lambda args, dialect: exp.Concat( 219 expressions=args, 220 safe=not dialect.STRICT_STRING_CONCAT, 221 coalesce=dialect.CONCAT_COALESCE, 222 ), 223 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 224 expressions=args, 225 safe=not dialect.STRICT_STRING_CONCAT, 226 coalesce=dialect.CONCAT_COALESCE, 227 ), 228 "CONVERT_TIMEZONE": build_convert_timezone, 229 "DATE_TO_DATE_STR": lambda args: exp.Cast( 230 this=seq_get(args, 0), 231 to=exp.DataType(this=exp.DataType.Type.TEXT), 232 ), 233 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 234 start=seq_get(args, 0), 235 end=seq_get(args, 1), 236 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 237 ), 238 "GENERATE_UUID": lambda args, dialect: exp.Uuid( 239 is_string=dialect.UUID_IS_STRING_TYPE or None 240 ), 241 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 242 "GREATEST": lambda args, dialect: exp.Greatest( 243 this=seq_get(args, 0), 244 expressions=args[1:], 245 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 246 ), 247 "LEAST": lambda args, dialect: exp.Least( 248 this=seq_get(args, 0), 249 expressions=args[1:], 250 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 251 ), 252 "HEX": build_hex, 253 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 254 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 255 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 256 "JSON_KEYS": lambda args, dialect: exp.JSONKeys( 257 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 258 ), 259 "LIKE": build_like, 260 "LOG": build_logarithm, 261 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 262 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 263 "LOWER": build_lower, 264 "LPAD": lambda args: build_pad(args), 265 "LEFTPAD": lambda args: build_pad(args), 266 "LTRIM": lambda args: build_trim(args), 267 "MOD": build_mod, 268 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 269 "RPAD": lambda args: build_pad(args, is_left=False), 270 "RTRIM": lambda args: build_trim(args, is_left=False), 271 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 272 if len(args) != 2 273 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 274 "STRPOS": exp.StrPosition.from_arg_list, 275 "CHARINDEX": lambda args: build_locate_strposition(args), 276 "INSTR": exp.StrPosition.from_arg_list, 277 "LOCATE": lambda args: build_locate_strposition(args), 278 "TIME_TO_TIME_STR": lambda args: exp.Cast( 279 this=seq_get(args, 0), 280 to=exp.DataType(this=exp.DataType.Type.TEXT), 281 ), 282 "TO_HEX": build_hex, 283 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 284 this=exp.Cast( 285 this=seq_get(args, 0), 286 to=exp.DataType(this=exp.DataType.Type.TEXT), 287 ), 288 start=exp.Literal.number(1), 289 length=exp.Literal.number(10), 290 ), 291 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 292 "UPPER": build_upper, 293 "UUID": lambda args, dialect: exp.Uuid(is_string=dialect.UUID_IS_STRING_TYPE or None), 294 "VAR_MAP": build_var_map, 295 } 296 297 NO_PAREN_FUNCTIONS = { 298 TokenType.CURRENT_DATE: exp.CurrentDate, 299 TokenType.CURRENT_DATETIME: exp.CurrentDate, 300 TokenType.CURRENT_TIME: exp.CurrentTime, 301 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 302 TokenType.CURRENT_USER: exp.CurrentUser, 303 TokenType.LOCALTIME: exp.Localtime, 304 TokenType.LOCALTIMESTAMP: exp.Localtimestamp, 305 TokenType.CURRENT_ROLE: exp.CurrentRole, 306 } 307 308 STRUCT_TYPE_TOKENS = { 309 TokenType.FILE, 310 TokenType.NESTED, 311 TokenType.OBJECT, 312 TokenType.STRUCT, 313 TokenType.UNION, 314 } 315 316 NESTED_TYPE_TOKENS = { 317 TokenType.ARRAY, 318 TokenType.LIST, 319 TokenType.LOWCARDINALITY, 320 TokenType.MAP, 321 TokenType.NULLABLE, 322 TokenType.RANGE, 323 *STRUCT_TYPE_TOKENS, 324 } 325 326 ENUM_TYPE_TOKENS = { 327 TokenType.DYNAMIC, 328 TokenType.ENUM, 329 TokenType.ENUM8, 330 TokenType.ENUM16, 331 } 332 333 AGGREGATE_TYPE_TOKENS = { 334 TokenType.AGGREGATEFUNCTION, 335 TokenType.SIMPLEAGGREGATEFUNCTION, 336 } 337 338 TYPE_TOKENS = { 339 TokenType.BIT, 340 TokenType.BOOLEAN, 341 TokenType.TINYINT, 342 TokenType.UTINYINT, 343 TokenType.SMALLINT, 344 TokenType.USMALLINT, 345 TokenType.INT, 346 TokenType.UINT, 347 TokenType.BIGINT, 348 TokenType.UBIGINT, 349 TokenType.BIGNUM, 350 TokenType.INT128, 351 TokenType.UINT128, 352 TokenType.INT256, 353 TokenType.UINT256, 354 TokenType.MEDIUMINT, 355 TokenType.UMEDIUMINT, 356 TokenType.FIXEDSTRING, 357 TokenType.FLOAT, 358 TokenType.DOUBLE, 359 TokenType.UDOUBLE, 360 TokenType.CHAR, 361 TokenType.NCHAR, 362 TokenType.VARCHAR, 363 TokenType.NVARCHAR, 364 TokenType.BPCHAR, 365 TokenType.TEXT, 366 TokenType.MEDIUMTEXT, 367 TokenType.LONGTEXT, 368 TokenType.BLOB, 369 TokenType.MEDIUMBLOB, 370 TokenType.LONGBLOB, 371 TokenType.BINARY, 372 TokenType.VARBINARY, 373 TokenType.JSON, 374 TokenType.JSONB, 375 TokenType.INTERVAL, 376 TokenType.TINYBLOB, 377 TokenType.TINYTEXT, 378 TokenType.TIME, 379 TokenType.TIMETZ, 380 TokenType.TIME_NS, 381 TokenType.TIMESTAMP, 382 TokenType.TIMESTAMP_S, 383 TokenType.TIMESTAMP_MS, 384 TokenType.TIMESTAMP_NS, 385 TokenType.TIMESTAMPTZ, 386 TokenType.TIMESTAMPLTZ, 387 TokenType.TIMESTAMPNTZ, 388 TokenType.DATETIME, 389 TokenType.DATETIME2, 390 TokenType.DATETIME64, 391 TokenType.SMALLDATETIME, 392 TokenType.DATE, 393 TokenType.DATE32, 394 TokenType.INT4RANGE, 395 TokenType.INT4MULTIRANGE, 396 TokenType.INT8RANGE, 397 TokenType.INT8MULTIRANGE, 398 TokenType.NUMRANGE, 399 TokenType.NUMMULTIRANGE, 400 TokenType.TSRANGE, 401 TokenType.TSMULTIRANGE, 402 TokenType.TSTZRANGE, 403 TokenType.TSTZMULTIRANGE, 404 TokenType.DATERANGE, 405 TokenType.DATEMULTIRANGE, 406 TokenType.DECIMAL, 407 TokenType.DECIMAL32, 408 TokenType.DECIMAL64, 409 TokenType.DECIMAL128, 410 TokenType.DECIMAL256, 411 TokenType.DECFLOAT, 412 TokenType.UDECIMAL, 413 TokenType.BIGDECIMAL, 414 TokenType.UUID, 415 TokenType.GEOGRAPHY, 416 TokenType.GEOGRAPHYPOINT, 417 TokenType.GEOMETRY, 418 TokenType.POINT, 419 TokenType.RING, 420 TokenType.LINESTRING, 421 TokenType.MULTILINESTRING, 422 TokenType.POLYGON, 423 TokenType.MULTIPOLYGON, 424 TokenType.HLLSKETCH, 425 TokenType.HSTORE, 426 TokenType.PSEUDO_TYPE, 427 TokenType.SUPER, 428 TokenType.SERIAL, 429 TokenType.SMALLSERIAL, 430 TokenType.BIGSERIAL, 431 TokenType.XML, 432 TokenType.YEAR, 433 TokenType.USERDEFINED, 434 TokenType.MONEY, 435 TokenType.SMALLMONEY, 436 TokenType.ROWVERSION, 437 TokenType.IMAGE, 438 TokenType.VARIANT, 439 TokenType.VECTOR, 440 TokenType.VOID, 441 TokenType.OBJECT, 442 TokenType.OBJECT_IDENTIFIER, 443 TokenType.INET, 444 TokenType.IPADDRESS, 445 TokenType.IPPREFIX, 446 TokenType.IPV4, 447 TokenType.IPV6, 448 TokenType.UNKNOWN, 449 TokenType.NOTHING, 450 TokenType.NULL, 451 TokenType.NAME, 452 TokenType.TDIGEST, 453 TokenType.DYNAMIC, 454 *ENUM_TYPE_TOKENS, 455 *NESTED_TYPE_TOKENS, 456 *AGGREGATE_TYPE_TOKENS, 457 } 458 459 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 460 TokenType.BIGINT: TokenType.UBIGINT, 461 TokenType.INT: TokenType.UINT, 462 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 463 TokenType.SMALLINT: TokenType.USMALLINT, 464 TokenType.TINYINT: TokenType.UTINYINT, 465 TokenType.DECIMAL: TokenType.UDECIMAL, 466 TokenType.DOUBLE: TokenType.UDOUBLE, 467 } 468 469 SUBQUERY_PREDICATES = { 470 TokenType.ANY: exp.Any, 471 TokenType.ALL: exp.All, 472 TokenType.EXISTS: exp.Exists, 473 TokenType.SOME: exp.Any, 474 } 475 476 RESERVED_TOKENS = { 477 *Tokenizer.SINGLE_TOKENS.values(), 478 TokenType.SELECT, 479 } - {TokenType.IDENTIFIER} 480 481 DB_CREATABLES = { 482 TokenType.DATABASE, 483 TokenType.DICTIONARY, 484 TokenType.FILE_FORMAT, 485 TokenType.MODEL, 486 TokenType.NAMESPACE, 487 TokenType.SCHEMA, 488 TokenType.SEMANTIC_VIEW, 489 TokenType.SEQUENCE, 490 TokenType.SINK, 491 TokenType.SOURCE, 492 TokenType.STAGE, 493 TokenType.STORAGE_INTEGRATION, 494 TokenType.STREAMLIT, 495 TokenType.TABLE, 496 TokenType.TAG, 497 TokenType.VIEW, 498 TokenType.WAREHOUSE, 499 } 500 501 CREATABLES = { 502 TokenType.COLUMN, 503 TokenType.CONSTRAINT, 504 TokenType.FOREIGN_KEY, 505 TokenType.FUNCTION, 506 TokenType.INDEX, 507 TokenType.PROCEDURE, 508 *DB_CREATABLES, 509 } 510 511 ALTERABLES = { 512 TokenType.INDEX, 513 TokenType.TABLE, 514 TokenType.VIEW, 515 TokenType.SESSION, 516 } 517 518 # Tokens that can represent identifiers 519 ID_VAR_TOKENS = { 520 TokenType.ALL, 521 TokenType.ANALYZE, 522 TokenType.ATTACH, 523 TokenType.VAR, 524 TokenType.ANTI, 525 TokenType.APPLY, 526 TokenType.ASC, 527 TokenType.ASOF, 528 TokenType.AUTO_INCREMENT, 529 TokenType.BEGIN, 530 TokenType.BPCHAR, 531 TokenType.CACHE, 532 TokenType.CASE, 533 TokenType.COLLATE, 534 TokenType.COMMAND, 535 TokenType.COMMENT, 536 TokenType.COMMIT, 537 TokenType.CONSTRAINT, 538 TokenType.COPY, 539 TokenType.CUBE, 540 TokenType.CURRENT_SCHEMA, 541 TokenType.DEFAULT, 542 TokenType.DELETE, 543 TokenType.DESC, 544 TokenType.DESCRIBE, 545 TokenType.DETACH, 546 TokenType.DICTIONARY, 547 TokenType.DIV, 548 TokenType.END, 549 TokenType.EXECUTE, 550 TokenType.EXPORT, 551 TokenType.ESCAPE, 552 TokenType.FALSE, 553 TokenType.FIRST, 554 TokenType.FILTER, 555 TokenType.FINAL, 556 TokenType.FORMAT, 557 TokenType.FULL, 558 TokenType.GET, 559 TokenType.IDENTIFIER, 560 TokenType.IS, 561 TokenType.ISNULL, 562 TokenType.INTERVAL, 563 TokenType.KEEP, 564 TokenType.KILL, 565 TokenType.LEFT, 566 TokenType.LIMIT, 567 TokenType.LOAD, 568 TokenType.LOCK, 569 TokenType.MATCH, 570 TokenType.MERGE, 571 TokenType.NATURAL, 572 TokenType.NEXT, 573 TokenType.OFFSET, 574 TokenType.OPERATOR, 575 TokenType.ORDINALITY, 576 TokenType.OVER, 577 TokenType.OVERLAPS, 578 TokenType.OVERWRITE, 579 TokenType.PARTITION, 580 TokenType.PERCENT, 581 TokenType.PIVOT, 582 TokenType.PRAGMA, 583 TokenType.PUT, 584 TokenType.RANGE, 585 TokenType.RECURSIVE, 586 TokenType.REFERENCES, 587 TokenType.REFRESH, 588 TokenType.RENAME, 589 TokenType.REPLACE, 590 TokenType.RIGHT, 591 TokenType.ROLLUP, 592 TokenType.ROW, 593 TokenType.ROWS, 594 TokenType.SEMI, 595 TokenType.SET, 596 TokenType.SETTINGS, 597 TokenType.SHOW, 598 TokenType.TEMPORARY, 599 TokenType.TOP, 600 TokenType.TRUE, 601 TokenType.TRUNCATE, 602 TokenType.UNIQUE, 603 TokenType.UNNEST, 604 TokenType.UNPIVOT, 605 TokenType.UPDATE, 606 TokenType.USE, 607 TokenType.VOLATILE, 608 TokenType.WINDOW, 609 *ALTERABLES, 610 *CREATABLES, 611 *SUBQUERY_PREDICATES, 612 *TYPE_TOKENS, 613 *NO_PAREN_FUNCTIONS, 614 } 615 ID_VAR_TOKENS.remove(TokenType.UNION) 616 617 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 618 TokenType.ANTI, 619 TokenType.ASOF, 620 TokenType.FULL, 621 TokenType.LEFT, 622 TokenType.LOCK, 623 TokenType.NATURAL, 624 TokenType.RIGHT, 625 TokenType.SEMI, 626 TokenType.WINDOW, 627 } 628 629 ALIAS_TOKENS = ID_VAR_TOKENS 630 631 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 632 633 ARRAY_CONSTRUCTORS = { 634 "ARRAY": exp.Array, 635 "LIST": exp.List, 636 } 637 638 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 639 640 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 641 642 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 643 644 FUNC_TOKENS = { 645 TokenType.COLLATE, 646 TokenType.COMMAND, 647 TokenType.CURRENT_DATE, 648 TokenType.CURRENT_DATETIME, 649 TokenType.CURRENT_SCHEMA, 650 TokenType.CURRENT_TIMESTAMP, 651 TokenType.CURRENT_TIME, 652 TokenType.CURRENT_USER, 653 TokenType.CURRENT_CATALOG, 654 TokenType.FILTER, 655 TokenType.FIRST, 656 TokenType.FORMAT, 657 TokenType.GET, 658 TokenType.GLOB, 659 TokenType.IDENTIFIER, 660 TokenType.INDEX, 661 TokenType.ISNULL, 662 TokenType.ILIKE, 663 TokenType.INSERT, 664 TokenType.LIKE, 665 TokenType.LOCALTIME, 666 TokenType.LOCALTIMESTAMP, 667 TokenType.MERGE, 668 TokenType.NEXT, 669 TokenType.OFFSET, 670 TokenType.PRIMARY_KEY, 671 TokenType.RANGE, 672 TokenType.REPLACE, 673 TokenType.RLIKE, 674 TokenType.ROW, 675 TokenType.SESSION_USER, 676 TokenType.UNNEST, 677 TokenType.VAR, 678 TokenType.LEFT, 679 TokenType.RIGHT, 680 TokenType.SEQUENCE, 681 TokenType.DATE, 682 TokenType.DATETIME, 683 TokenType.TABLE, 684 TokenType.TIMESTAMP, 685 TokenType.TIMESTAMPTZ, 686 TokenType.TRUNCATE, 687 TokenType.UTC_DATE, 688 TokenType.UTC_TIME, 689 TokenType.UTC_TIMESTAMP, 690 TokenType.WINDOW, 691 TokenType.XOR, 692 *TYPE_TOKENS, 693 *SUBQUERY_PREDICATES, 694 } 695 696 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 697 TokenType.AND: exp.And, 698 } 699 700 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 701 TokenType.COLON_EQ: exp.PropertyEQ, 702 } 703 704 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 705 TokenType.OR: exp.Or, 706 } 707 708 EQUALITY = { 709 TokenType.EQ: exp.EQ, 710 TokenType.NEQ: exp.NEQ, 711 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 712 } 713 714 COMPARISON = { 715 TokenType.GT: exp.GT, 716 TokenType.GTE: exp.GTE, 717 TokenType.LT: exp.LT, 718 TokenType.LTE: exp.LTE, 719 } 720 721 BITWISE = { 722 TokenType.AMP: exp.BitwiseAnd, 723 TokenType.CARET: exp.BitwiseXor, 724 TokenType.PIPE: exp.BitwiseOr, 725 } 726 727 TERM = { 728 TokenType.DASH: exp.Sub, 729 TokenType.PLUS: exp.Add, 730 TokenType.MOD: exp.Mod, 731 TokenType.COLLATE: exp.Collate, 732 } 733 734 FACTOR = { 735 TokenType.DIV: exp.IntDiv, 736 TokenType.LR_ARROW: exp.Distance, 737 TokenType.SLASH: exp.Div, 738 TokenType.STAR: exp.Mul, 739 } 740 741 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 742 743 TIMES = { 744 TokenType.TIME, 745 TokenType.TIMETZ, 746 } 747 748 TIMESTAMPS = { 749 TokenType.TIMESTAMP, 750 TokenType.TIMESTAMPNTZ, 751 TokenType.TIMESTAMPTZ, 752 TokenType.TIMESTAMPLTZ, 753 *TIMES, 754 } 755 756 SET_OPERATIONS = { 757 TokenType.UNION, 758 TokenType.INTERSECT, 759 TokenType.EXCEPT, 760 } 761 762 JOIN_METHODS = { 763 TokenType.ASOF, 764 TokenType.NATURAL, 765 TokenType.POSITIONAL, 766 } 767 768 JOIN_SIDES = { 769 TokenType.LEFT, 770 TokenType.RIGHT, 771 TokenType.FULL, 772 } 773 774 JOIN_KINDS = { 775 TokenType.ANTI, 776 TokenType.CROSS, 777 TokenType.INNER, 778 TokenType.OUTER, 779 TokenType.SEMI, 780 TokenType.STRAIGHT_JOIN, 781 } 782 783 JOIN_HINTS: t.Set[str] = set() 784 785 LAMBDAS = { 786 TokenType.ARROW: lambda self, expressions: self.expression( 787 exp.Lambda, 788 this=self._replace_lambda( 789 self._parse_disjunction(), 790 expressions, 791 ), 792 expressions=expressions, 793 ), 794 TokenType.FARROW: lambda self, expressions: self.expression( 795 exp.Kwarg, 796 this=exp.var(expressions[0].name), 797 expression=self._parse_disjunction(), 798 ), 799 } 800 801 COLUMN_OPERATORS = { 802 TokenType.DOT: None, 803 TokenType.DOTCOLON: lambda self, this, to: self.expression( 804 exp.JSONCast, 805 this=this, 806 to=to, 807 ), 808 TokenType.DCOLON: lambda self, this, to: self.build_cast( 809 strict=self.STRICT_CAST, this=this, to=to 810 ), 811 TokenType.ARROW: lambda self, this, path: self.expression( 812 exp.JSONExtract, 813 this=this, 814 expression=self.dialect.to_json_path(path), 815 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 816 ), 817 TokenType.DARROW: lambda self, this, path: self.expression( 818 exp.JSONExtractScalar, 819 this=this, 820 expression=self.dialect.to_json_path(path), 821 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 822 scalar_only=self.dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY, 823 ), 824 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 825 exp.JSONBExtract, 826 this=this, 827 expression=path, 828 ), 829 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 830 exp.JSONBExtractScalar, 831 this=this, 832 expression=path, 833 ), 834 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 835 exp.JSONBContains, 836 this=this, 837 expression=key, 838 ), 839 } 840 841 CAST_COLUMN_OPERATORS = { 842 TokenType.DOTCOLON, 843 TokenType.DCOLON, 844 } 845 846 EXPRESSION_PARSERS = { 847 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 848 exp.Column: lambda self: self._parse_column(), 849 exp.ColumnDef: lambda self: self._parse_column_def(self._parse_column()), 850 exp.Condition: lambda self: self._parse_disjunction(), 851 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 852 exp.Expression: lambda self: self._parse_expression(), 853 exp.From: lambda self: self._parse_from(joins=True), 854 exp.GrantPrincipal: lambda self: self._parse_grant_principal(), 855 exp.GrantPrivilege: lambda self: self._parse_grant_privilege(), 856 exp.Group: lambda self: self._parse_group(), 857 exp.Having: lambda self: self._parse_having(), 858 exp.Hint: lambda self: self._parse_hint_body(), 859 exp.Identifier: lambda self: self._parse_id_var(), 860 exp.Join: lambda self: self._parse_join(), 861 exp.Lambda: lambda self: self._parse_lambda(), 862 exp.Lateral: lambda self: self._parse_lateral(), 863 exp.Limit: lambda self: self._parse_limit(), 864 exp.Offset: lambda self: self._parse_offset(), 865 exp.Order: lambda self: self._parse_order(), 866 exp.Ordered: lambda self: self._parse_ordered(), 867 exp.Properties: lambda self: self._parse_properties(), 868 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 869 exp.Qualify: lambda self: self._parse_qualify(), 870 exp.Returning: lambda self: self._parse_returning(), 871 exp.Select: lambda self: self._parse_select(), 872 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 873 exp.Table: lambda self: self._parse_table_parts(), 874 exp.TableAlias: lambda self: self._parse_table_alias(), 875 exp.Tuple: lambda self: self._parse_value(values=False), 876 exp.Whens: lambda self: self._parse_when_matched(), 877 exp.Where: lambda self: self._parse_where(), 878 exp.Window: lambda self: self._parse_named_window(), 879 exp.With: lambda self: self._parse_with(), 880 "JOIN_TYPE": lambda self: self._parse_join_parts(), 881 } 882 883 STATEMENT_PARSERS = { 884 TokenType.ALTER: lambda self: self._parse_alter(), 885 TokenType.ANALYZE: lambda self: self._parse_analyze(), 886 TokenType.BEGIN: lambda self: self._parse_transaction(), 887 TokenType.CACHE: lambda self: self._parse_cache(), 888 TokenType.COMMENT: lambda self: self._parse_comment(), 889 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 890 TokenType.COPY: lambda self: self._parse_copy(), 891 TokenType.CREATE: lambda self: self._parse_create(), 892 TokenType.DELETE: lambda self: self._parse_delete(), 893 TokenType.DESC: lambda self: self._parse_describe(), 894 TokenType.DESCRIBE: lambda self: self._parse_describe(), 895 TokenType.DROP: lambda self: self._parse_drop(), 896 TokenType.GRANT: lambda self: self._parse_grant(), 897 TokenType.REVOKE: lambda self: self._parse_revoke(), 898 TokenType.INSERT: lambda self: self._parse_insert(), 899 TokenType.KILL: lambda self: self._parse_kill(), 900 TokenType.LOAD: lambda self: self._parse_load(), 901 TokenType.MERGE: lambda self: self._parse_merge(), 902 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 903 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 904 TokenType.REFRESH: lambda self: self._parse_refresh(), 905 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 906 TokenType.SET: lambda self: self._parse_set(), 907 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 908 TokenType.UNCACHE: lambda self: self._parse_uncache(), 909 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 910 TokenType.UPDATE: lambda self: self._parse_update(), 911 TokenType.USE: lambda self: self._parse_use(), 912 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 913 } 914 915 UNARY_PARSERS = { 916 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 917 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 918 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 919 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 920 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 921 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 922 } 923 924 STRING_PARSERS = { 925 TokenType.HEREDOC_STRING: lambda self, token: self.expression(exp.RawString, token=token), 926 TokenType.NATIONAL_STRING: lambda self, token: self.expression(exp.National, token=token), 927 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, token=token), 928 TokenType.STRING: lambda self, token: self.expression( 929 exp.Literal, token=token, is_string=True 930 ), 931 TokenType.UNICODE_STRING: lambda self, token: self.expression( 932 exp.UnicodeString, 933 token=token, 934 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 935 ), 936 } 937 938 NUMERIC_PARSERS = { 939 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, token=token), 940 TokenType.BYTE_STRING: lambda self, token: self.expression( 941 exp.ByteString, 942 token=token, 943 is_bytes=self.dialect.BYTE_STRING_IS_BYTES_TYPE or None, 944 ), 945 TokenType.HEX_STRING: lambda self, token: self.expression( 946 exp.HexString, 947 token=token, 948 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 949 ), 950 TokenType.NUMBER: lambda self, token: self.expression( 951 exp.Literal, token=token, is_string=False 952 ), 953 } 954 955 PRIMARY_PARSERS = { 956 **STRING_PARSERS, 957 **NUMERIC_PARSERS, 958 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 959 TokenType.NULL: lambda self, _: self.expression(exp.Null), 960 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 961 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 962 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 963 TokenType.STAR: lambda self, _: self._parse_star_ops(), 964 } 965 966 PLACEHOLDER_PARSERS = { 967 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 968 TokenType.PARAMETER: lambda self: self._parse_parameter(), 969 TokenType.COLON: lambda self: ( 970 self.expression(exp.Placeholder, this=self._prev.text) 971 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 972 else None 973 ), 974 } 975 976 RANGE_PARSERS = { 977 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 978 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 979 TokenType.GLOB: binary_range_parser(exp.Glob), 980 TokenType.ILIKE: binary_range_parser(exp.ILike), 981 TokenType.IN: lambda self, this: self._parse_in(this), 982 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 983 TokenType.IS: lambda self, this: self._parse_is(this), 984 TokenType.LIKE: binary_range_parser(exp.Like), 985 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 986 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 987 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 988 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 989 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 990 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 991 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 992 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 993 TokenType.ADJACENT: binary_range_parser(exp.Adjacent), 994 TokenType.OPERATOR: lambda self, this: self._parse_operator(this), 995 TokenType.AMP_LT: binary_range_parser(exp.ExtendsLeft), 996 TokenType.AMP_GT: binary_range_parser(exp.ExtendsRight), 997 } 998 999 PIPE_SYNTAX_TRANSFORM_PARSERS = { 1000 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 1001 "AS": lambda self, query: self._build_pipe_cte( 1002 query, [exp.Star()], self._parse_table_alias() 1003 ), 1004 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 1005 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 1006 "ORDER BY": lambda self, query: query.order_by( 1007 self._parse_order(), append=False, copy=False 1008 ), 1009 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1010 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 1011 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 1012 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1013 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 1014 } 1015 1016 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 1017 "ALLOWED_VALUES": lambda self: self.expression( 1018 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 1019 ), 1020 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 1021 "AUTO": lambda self: self._parse_auto_property(), 1022 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 1023 "BACKUP": lambda self: self.expression( 1024 exp.BackupProperty, this=self._parse_var(any_token=True) 1025 ), 1026 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 1027 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1028 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1029 "CHECKSUM": lambda self: self._parse_checksum(), 1030 "CLUSTER BY": lambda self: self._parse_cluster(), 1031 "CLUSTERED": lambda self: self._parse_clustered_by(), 1032 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 1033 exp.CollateProperty, **kwargs 1034 ), 1035 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 1036 "CONTAINS": lambda self: self._parse_contains_property(), 1037 "COPY": lambda self: self._parse_copy_property(), 1038 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 1039 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 1040 "DEFINER": lambda self: self._parse_definer(), 1041 "DETERMINISTIC": lambda self: self.expression( 1042 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1043 ), 1044 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 1045 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 1046 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 1047 "DISTKEY": lambda self: self._parse_distkey(), 1048 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1049 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1050 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1051 "ENVIRONMENT": lambda self: self.expression( 1052 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1053 ), 1054 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1055 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1056 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1057 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1058 "FREESPACE": lambda self: self._parse_freespace(), 1059 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1060 "HEAP": lambda self: self.expression(exp.HeapProperty), 1061 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1062 "IMMUTABLE": lambda self: self.expression( 1063 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1064 ), 1065 "INHERITS": lambda self: self.expression( 1066 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1067 ), 1068 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1069 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1070 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1071 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1072 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1073 "LIKE": lambda self: self._parse_create_like(), 1074 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1075 "LOCK": lambda self: self._parse_locking(), 1076 "LOCKING": lambda self: self._parse_locking(), 1077 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1078 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1079 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1080 "MODIFIES": lambda self: self._parse_modifies_property(), 1081 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1082 "NO": lambda self: self._parse_no_property(), 1083 "ON": lambda self: self._parse_on_property(), 1084 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1085 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1086 "PARTITION": lambda self: self._parse_partitioned_of(), 1087 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1088 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1089 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1090 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1091 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1092 "READS": lambda self: self._parse_reads_property(), 1093 "REMOTE": lambda self: self._parse_remote_with_connection(), 1094 "RETURNS": lambda self: self._parse_returns(), 1095 "STRICT": lambda self: self.expression(exp.StrictProperty), 1096 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1097 "ROW": lambda self: self._parse_row(), 1098 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1099 "SAMPLE": lambda self: self.expression( 1100 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1101 ), 1102 "SECURE": lambda self: self.expression(exp.SecureProperty), 1103 "SECURITY": lambda self: self._parse_security(), 1104 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1105 "SETTINGS": lambda self: self._parse_settings_property(), 1106 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1107 "SORTKEY": lambda self: self._parse_sortkey(), 1108 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1109 "STABLE": lambda self: self.expression( 1110 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1111 ), 1112 "STORED": lambda self: self._parse_stored(), 1113 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1114 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1115 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1116 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1117 "TO": lambda self: self._parse_to_table(), 1118 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1119 "TRANSFORM": lambda self: self.expression( 1120 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1121 ), 1122 "TTL": lambda self: self._parse_ttl(), 1123 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1124 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1125 "VOLATILE": lambda self: self._parse_volatile_property(), 1126 "WITH": lambda self: self._parse_with_property(), 1127 } 1128 1129 CONSTRAINT_PARSERS = { 1130 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1131 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1132 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1133 "CHARACTER SET": lambda self: self.expression( 1134 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1135 ), 1136 "CHECK": lambda self: self.expression( 1137 exp.CheckColumnConstraint, 1138 this=self._parse_wrapped(self._parse_assignment), 1139 enforced=self._match_text_seq("ENFORCED"), 1140 ), 1141 "COLLATE": lambda self: self.expression( 1142 exp.CollateColumnConstraint, 1143 this=self._parse_identifier() or self._parse_column(), 1144 ), 1145 "COMMENT": lambda self: self.expression( 1146 exp.CommentColumnConstraint, this=self._parse_string() 1147 ), 1148 "COMPRESS": lambda self: self._parse_compress(), 1149 "CLUSTERED": lambda self: self.expression( 1150 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1151 ), 1152 "NONCLUSTERED": lambda self: self.expression( 1153 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1154 ), 1155 "DEFAULT": lambda self: self.expression( 1156 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1157 ), 1158 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1159 "EPHEMERAL": lambda self: self.expression( 1160 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1161 ), 1162 "EXCLUDE": lambda self: self.expression( 1163 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1164 ), 1165 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1166 "FORMAT": lambda self: self.expression( 1167 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1168 ), 1169 "GENERATED": lambda self: self._parse_generated_as_identity(), 1170 "IDENTITY": lambda self: self._parse_auto_increment(), 1171 "INLINE": lambda self: self._parse_inline(), 1172 "LIKE": lambda self: self._parse_create_like(), 1173 "NOT": lambda self: self._parse_not_constraint(), 1174 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1175 "ON": lambda self: ( 1176 self._match(TokenType.UPDATE) 1177 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1178 ) 1179 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1180 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1181 "PERIOD": lambda self: self._parse_period_for_system_time(), 1182 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1183 "REFERENCES": lambda self: self._parse_references(match=False), 1184 "TITLE": lambda self: self.expression( 1185 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1186 ), 1187 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1188 "UNIQUE": lambda self: self._parse_unique(), 1189 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1190 "WITH": lambda self: self.expression( 1191 exp.Properties, expressions=self._parse_wrapped_properties() 1192 ), 1193 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1194 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1195 } 1196 1197 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1198 if not self._match(TokenType.L_PAREN, advance=False): 1199 # Partitioning by bucket or truncate follows the syntax: 1200 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1201 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1202 self._retreat(self._index - 1) 1203 return None 1204 1205 klass = ( 1206 exp.PartitionedByBucket 1207 if self._prev.text.upper() == "BUCKET" 1208 else exp.PartitionByTruncate 1209 ) 1210 1211 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1212 this, expression = seq_get(args, 0), seq_get(args, 1) 1213 1214 if isinstance(this, exp.Literal): 1215 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1216 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1217 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1218 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1219 # 1220 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1221 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1222 this, expression = expression, this 1223 1224 return self.expression(klass, this=this, expression=expression) 1225 1226 ALTER_PARSERS = { 1227 "ADD": lambda self: self._parse_alter_table_add(), 1228 "AS": lambda self: self._parse_select(), 1229 "ALTER": lambda self: self._parse_alter_table_alter(), 1230 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1231 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1232 "DROP": lambda self: self._parse_alter_table_drop(), 1233 "RENAME": lambda self: self._parse_alter_table_rename(), 1234 "SET": lambda self: self._parse_alter_table_set(), 1235 "SWAP": lambda self: self.expression( 1236 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1237 ), 1238 } 1239 1240 ALTER_ALTER_PARSERS = { 1241 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1242 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1243 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1244 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1245 } 1246 1247 SCHEMA_UNNAMED_CONSTRAINTS = { 1248 "CHECK", 1249 "EXCLUDE", 1250 "FOREIGN KEY", 1251 "LIKE", 1252 "PERIOD", 1253 "PRIMARY KEY", 1254 "UNIQUE", 1255 "BUCKET", 1256 "TRUNCATE", 1257 } 1258 1259 NO_PAREN_FUNCTION_PARSERS = { 1260 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1261 "CASE": lambda self: self._parse_case(), 1262 "CONNECT_BY_ROOT": lambda self: self.expression( 1263 exp.ConnectByRoot, this=self._parse_column() 1264 ), 1265 "IF": lambda self: self._parse_if(), 1266 } 1267 1268 INVALID_FUNC_NAME_TOKENS = { 1269 TokenType.IDENTIFIER, 1270 TokenType.STRING, 1271 } 1272 1273 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1274 1275 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1276 1277 FUNCTION_PARSERS = { 1278 **{ 1279 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1280 }, 1281 **{ 1282 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1283 }, 1284 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1285 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1286 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1287 "CHAR": lambda self: self._parse_char(), 1288 "CHR": lambda self: self._parse_char(), 1289 "DECODE": lambda self: self._parse_decode(), 1290 "EXTRACT": lambda self: self._parse_extract(), 1291 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1292 "GAP_FILL": lambda self: self._parse_gap_fill(), 1293 "INITCAP": lambda self: self._parse_initcap(), 1294 "JSON_OBJECT": lambda self: self._parse_json_object(), 1295 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1296 "JSON_TABLE": lambda self: self._parse_json_table(), 1297 "MATCH": lambda self: self._parse_match_against(), 1298 "NORMALIZE": lambda self: self._parse_normalize(), 1299 "OPENJSON": lambda self: self._parse_open_json(), 1300 "OVERLAY": lambda self: self._parse_overlay(), 1301 "POSITION": lambda self: self._parse_position(), 1302 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1303 "STRING_AGG": lambda self: self._parse_string_agg(), 1304 "SUBSTRING": lambda self: self._parse_substring(), 1305 "TRIM": lambda self: self._parse_trim(), 1306 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1307 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1308 "XMLELEMENT": lambda self: self._parse_xml_element(), 1309 "XMLTABLE": lambda self: self._parse_xml_table(), 1310 } 1311 1312 QUERY_MODIFIER_PARSERS = { 1313 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1314 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1315 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1316 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1317 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1318 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1319 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1320 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1321 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1322 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1323 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1324 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1325 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1326 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1327 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1328 TokenType.CLUSTER_BY: lambda self: ( 1329 "cluster", 1330 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1331 ), 1332 TokenType.DISTRIBUTE_BY: lambda self: ( 1333 "distribute", 1334 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1335 ), 1336 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1337 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1338 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1339 } 1340 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1341 1342 SET_PARSERS = { 1343 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1344 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1345 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1346 "TRANSACTION": lambda self: self._parse_set_transaction(), 1347 } 1348 1349 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1350 1351 TYPE_LITERAL_PARSERS = { 1352 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1353 } 1354 1355 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1356 1357 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1358 1359 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1360 1361 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1362 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1363 "ISOLATION": ( 1364 ("LEVEL", "REPEATABLE", "READ"), 1365 ("LEVEL", "READ", "COMMITTED"), 1366 ("LEVEL", "READ", "UNCOMITTED"), 1367 ("LEVEL", "SERIALIZABLE"), 1368 ), 1369 "READ": ("WRITE", "ONLY"), 1370 } 1371 1372 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1373 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1374 ) 1375 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1376 1377 CREATE_SEQUENCE: OPTIONS_TYPE = { 1378 "SCALE": ("EXTEND", "NOEXTEND"), 1379 "SHARD": ("EXTEND", "NOEXTEND"), 1380 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1381 **dict.fromkeys( 1382 ( 1383 "SESSION", 1384 "GLOBAL", 1385 "KEEP", 1386 "NOKEEP", 1387 "ORDER", 1388 "NOORDER", 1389 "NOCACHE", 1390 "CYCLE", 1391 "NOCYCLE", 1392 "NOMINVALUE", 1393 "NOMAXVALUE", 1394 "NOSCALE", 1395 "NOSHARD", 1396 ), 1397 tuple(), 1398 ), 1399 } 1400 1401 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1402 1403 USABLES: OPTIONS_TYPE = dict.fromkeys( 1404 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1405 ) 1406 1407 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1408 1409 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1410 "TYPE": ("EVOLUTION",), 1411 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1412 } 1413 1414 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1415 1416 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1417 1418 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1419 "NOT": ("ENFORCED",), 1420 "MATCH": ( 1421 "FULL", 1422 "PARTIAL", 1423 "SIMPLE", 1424 ), 1425 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1426 "USING": ( 1427 "BTREE", 1428 "HASH", 1429 ), 1430 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1431 } 1432 1433 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1434 "NO": ("OTHERS",), 1435 "CURRENT": ("ROW",), 1436 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1437 } 1438 1439 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1440 1441 CLONE_KEYWORDS = {"CLONE", "COPY"} 1442 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1443 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1444 1445 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1446 1447 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1448 1449 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1450 1451 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1452 1453 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.RANGE, TokenType.ROWS} 1454 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1455 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1456 1457 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1458 1459 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1460 1461 ADD_CONSTRAINT_TOKENS = { 1462 TokenType.CONSTRAINT, 1463 TokenType.FOREIGN_KEY, 1464 TokenType.INDEX, 1465 TokenType.KEY, 1466 TokenType.PRIMARY_KEY, 1467 TokenType.UNIQUE, 1468 } 1469 1470 DISTINCT_TOKENS = {TokenType.DISTINCT} 1471 1472 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1473 1474 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1475 1476 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1477 1478 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1479 1480 ODBC_DATETIME_LITERALS: t.Dict[str, t.Type[exp.Expression]] = {} 1481 1482 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1483 1484 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1485 1486 # The style options for the DESCRIBE statement 1487 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1488 1489 SET_ASSIGNMENT_DELIMITERS = {"=", ":=", "TO"} 1490 1491 # The style options for the ANALYZE statement 1492 ANALYZE_STYLES = { 1493 "BUFFER_USAGE_LIMIT", 1494 "FULL", 1495 "LOCAL", 1496 "NO_WRITE_TO_BINLOG", 1497 "SAMPLE", 1498 "SKIP_LOCKED", 1499 "VERBOSE", 1500 } 1501 1502 ANALYZE_EXPRESSION_PARSERS = { 1503 "ALL": lambda self: self._parse_analyze_columns(), 1504 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1505 "DELETE": lambda self: self._parse_analyze_delete(), 1506 "DROP": lambda self: self._parse_analyze_histogram(), 1507 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1508 "LIST": lambda self: self._parse_analyze_list(), 1509 "PREDICATE": lambda self: self._parse_analyze_columns(), 1510 "UPDATE": lambda self: self._parse_analyze_histogram(), 1511 "VALIDATE": lambda self: self._parse_analyze_validate(), 1512 } 1513 1514 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1515 1516 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1517 1518 OPERATION_MODIFIERS: t.Set[str] = set() 1519 1520 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1521 1522 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows, exp.Values) 1523 1524 STRICT_CAST = True 1525 1526 PREFIXED_PIVOT_COLUMNS = False 1527 IDENTIFY_PIVOT_STRINGS = False 1528 1529 LOG_DEFAULTS_TO_LN = False 1530 1531 # Whether the table sample clause expects CSV syntax 1532 TABLESAMPLE_CSV = False 1533 1534 # The default method used for table sampling 1535 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1536 1537 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1538 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1539 1540 # Whether the TRIM function expects the characters to trim as its first argument 1541 TRIM_PATTERN_FIRST = False 1542 1543 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1544 STRING_ALIASES = False 1545 1546 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1547 MODIFIERS_ATTACHED_TO_SET_OP = True 1548 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1549 1550 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1551 NO_PAREN_IF_COMMANDS = True 1552 1553 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1554 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1555 1556 # Whether the `:` operator is used to extract a value from a VARIANT column 1557 COLON_IS_VARIANT_EXTRACT = False 1558 1559 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1560 # If this is True and '(' is not found, the keyword will be treated as an identifier 1561 VALUES_FOLLOWED_BY_PAREN = True 1562 1563 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1564 SUPPORTS_IMPLICIT_UNNEST = False 1565 1566 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1567 INTERVAL_SPANS = True 1568 1569 # Whether a PARTITION clause can follow a table reference 1570 SUPPORTS_PARTITION_SELECTION = False 1571 1572 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1573 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1574 1575 # Whether the 'AS' keyword is optional in the CTE definition syntax 1576 OPTIONAL_ALIAS_TOKEN_CTE = True 1577 1578 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1579 ALTER_RENAME_REQUIRES_COLUMN = True 1580 1581 # Whether Alter statements are allowed to contain Partition specifications 1582 ALTER_TABLE_PARTITIONS = False 1583 1584 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1585 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1586 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1587 # as BigQuery, where all joins have the same precedence. 1588 JOINS_HAVE_EQUAL_PRECEDENCE = False 1589 1590 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1591 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1592 1593 # Whether map literals support arbitrary expressions as keys. 1594 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1595 # When False, keys are typically restricted to identifiers. 1596 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1597 1598 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1599 # is true for Snowflake but not for BigQuery which can also process strings 1600 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1601 1602 # Dialects like Databricks support JOINS without join criteria 1603 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1604 ADD_JOIN_ON_TRUE = False 1605 1606 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1607 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1608 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = False 1609 1610 __slots__ = ( 1611 "error_level", 1612 "error_message_context", 1613 "max_errors", 1614 "dialect", 1615 "sql", 1616 "errors", 1617 "_tokens", 1618 "_index", 1619 "_curr", 1620 "_next", 1621 "_prev", 1622 "_prev_comments", 1623 "_pipe_cte_counter", 1624 ) 1625 1626 # Autofilled 1627 SHOW_TRIE: t.Dict = {} 1628 SET_TRIE: t.Dict = {} 1629 1630 def __init__( 1631 self, 1632 error_level: t.Optional[ErrorLevel] = None, 1633 error_message_context: int = 100, 1634 max_errors: int = 3, 1635 dialect: DialectType = None, 1636 ): 1637 from sqlglot.dialects import Dialect 1638 1639 self.error_level = error_level or ErrorLevel.IMMEDIATE 1640 self.error_message_context = error_message_context 1641 self.max_errors = max_errors 1642 self.dialect = Dialect.get_or_raise(dialect) 1643 self.reset() 1644 1645 def reset(self): 1646 self.sql = "" 1647 self.errors = [] 1648 self._tokens = [] 1649 self._index = 0 1650 self._curr = None 1651 self._next = None 1652 self._prev = None 1653 self._prev_comments = None 1654 self._pipe_cte_counter = 0 1655 1656 def parse( 1657 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1658 ) -> t.List[t.Optional[exp.Expression]]: 1659 """ 1660 Parses a list of tokens and returns a list of syntax trees, one tree 1661 per parsed SQL statement. 1662 1663 Args: 1664 raw_tokens: The list of tokens. 1665 sql: The original SQL string, used to produce helpful debug messages. 1666 1667 Returns: 1668 The list of the produced syntax trees. 1669 """ 1670 return self._parse( 1671 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1672 ) 1673 1674 def parse_into( 1675 self, 1676 expression_types: exp.IntoType, 1677 raw_tokens: t.List[Token], 1678 sql: t.Optional[str] = None, 1679 ) -> t.List[t.Optional[exp.Expression]]: 1680 """ 1681 Parses a list of tokens into a given Expression type. If a collection of Expression 1682 types is given instead, this method will try to parse the token list into each one 1683 of them, stopping at the first for which the parsing succeeds. 1684 1685 Args: 1686 expression_types: The expression type(s) to try and parse the token list into. 1687 raw_tokens: The list of tokens. 1688 sql: The original SQL string, used to produce helpful debug messages. 1689 1690 Returns: 1691 The target Expression. 1692 """ 1693 errors = [] 1694 for expression_type in ensure_list(expression_types): 1695 parser = self.EXPRESSION_PARSERS.get(expression_type) 1696 if not parser: 1697 raise TypeError(f"No parser registered for {expression_type}") 1698 1699 try: 1700 return self._parse(parser, raw_tokens, sql) 1701 except ParseError as e: 1702 e.errors[0]["into_expression"] = expression_type 1703 errors.append(e) 1704 1705 raise ParseError( 1706 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1707 errors=merge_errors(errors), 1708 ) from errors[-1] 1709 1710 def _parse( 1711 self, 1712 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1713 raw_tokens: t.List[Token], 1714 sql: t.Optional[str] = None, 1715 ) -> t.List[t.Optional[exp.Expression]]: 1716 self.reset() 1717 self.sql = sql or "" 1718 1719 total = len(raw_tokens) 1720 chunks: t.List[t.List[Token]] = [[]] 1721 1722 for i, token in enumerate(raw_tokens): 1723 if token.token_type == TokenType.SEMICOLON: 1724 if token.comments: 1725 chunks.append([token]) 1726 1727 if i < total - 1: 1728 chunks.append([]) 1729 else: 1730 chunks[-1].append(token) 1731 1732 expressions = [] 1733 1734 for tokens in chunks: 1735 self._index = -1 1736 self._tokens = tokens 1737 self._advance() 1738 1739 expressions.append(parse_method(self)) 1740 1741 if self._index < len(self._tokens): 1742 self.raise_error("Invalid expression / Unexpected token") 1743 1744 self.check_errors() 1745 1746 return expressions 1747 1748 def check_errors(self) -> None: 1749 """Logs or raises any found errors, depending on the chosen error level setting.""" 1750 if self.error_level == ErrorLevel.WARN: 1751 for error in self.errors: 1752 logger.error(str(error)) 1753 elif self.error_level == ErrorLevel.RAISE and self.errors: 1754 raise ParseError( 1755 concat_messages(self.errors, self.max_errors), 1756 errors=merge_errors(self.errors), 1757 ) 1758 1759 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1760 """ 1761 Appends an error in the list of recorded errors or raises it, depending on the chosen 1762 error level setting. 1763 """ 1764 token = token or self._curr or self._prev or Token.string("") 1765 formatted_sql, start_context, highlight, end_context = highlight_sql( 1766 sql=self.sql, 1767 positions=[(token.start, token.end)], 1768 context_length=self.error_message_context, 1769 ) 1770 formatted_message = f"{message}. Line {token.line}, Col: {token.col}.\n {formatted_sql}" 1771 1772 error = ParseError.new( 1773 formatted_message, 1774 description=message, 1775 line=token.line, 1776 col=token.col, 1777 start_context=start_context, 1778 highlight=highlight, 1779 end_context=end_context, 1780 ) 1781 1782 if self.error_level == ErrorLevel.IMMEDIATE: 1783 raise error 1784 1785 self.errors.append(error) 1786 1787 def expression( 1788 self, 1789 exp_class: t.Type[E], 1790 token: t.Optional[Token] = None, 1791 comments: t.Optional[t.List[str]] = None, 1792 **kwargs, 1793 ) -> E: 1794 """ 1795 Creates a new, validated Expression. 1796 1797 Args: 1798 exp_class: The expression class to instantiate. 1799 comments: An optional list of comments to attach to the expression. 1800 kwargs: The arguments to set for the expression along with their respective values. 1801 1802 Returns: 1803 The target expression. 1804 """ 1805 if token: 1806 instance = exp_class(this=token.text, **kwargs) 1807 instance.update_positions(token) 1808 else: 1809 instance = exp_class(**kwargs) 1810 instance.add_comments(comments) if comments else self._add_comments(instance) 1811 return self.validate_expression(instance) 1812 1813 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1814 if expression and self._prev_comments: 1815 expression.add_comments(self._prev_comments) 1816 self._prev_comments = None 1817 1818 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1819 """ 1820 Validates an Expression, making sure that all its mandatory arguments are set. 1821 1822 Args: 1823 expression: The expression to validate. 1824 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1825 1826 Returns: 1827 The validated expression. 1828 """ 1829 if self.error_level != ErrorLevel.IGNORE: 1830 for error_message in expression.error_messages(args): 1831 self.raise_error(error_message) 1832 1833 return expression 1834 1835 def _find_sql(self, start: Token, end: Token) -> str: 1836 return self.sql[start.start : end.end + 1] 1837 1838 def _is_connected(self) -> bool: 1839 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1840 1841 def _advance(self, times: int = 1) -> None: 1842 self._index += times 1843 self._curr = seq_get(self._tokens, self._index) 1844 self._next = seq_get(self._tokens, self._index + 1) 1845 1846 if self._index > 0: 1847 self._prev = self._tokens[self._index - 1] 1848 self._prev_comments = self._prev.comments 1849 else: 1850 self._prev = None 1851 self._prev_comments = None 1852 1853 def _retreat(self, index: int) -> None: 1854 if index != self._index: 1855 self._advance(index - self._index) 1856 1857 def _warn_unsupported(self) -> None: 1858 if len(self._tokens) <= 1: 1859 return 1860 1861 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1862 # interested in emitting a warning for the one being currently processed. 1863 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1864 1865 logger.warning( 1866 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1867 ) 1868 1869 def _parse_command(self) -> exp.Command: 1870 self._warn_unsupported() 1871 return self.expression( 1872 exp.Command, 1873 comments=self._prev_comments, 1874 this=self._prev.text.upper(), 1875 expression=self._parse_string(), 1876 ) 1877 1878 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1879 """ 1880 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1881 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1882 solve this by setting & resetting the parser state accordingly 1883 """ 1884 index = self._index 1885 error_level = self.error_level 1886 1887 self.error_level = ErrorLevel.IMMEDIATE 1888 try: 1889 this = parse_method() 1890 except ParseError: 1891 this = None 1892 finally: 1893 if not this or retreat: 1894 self._retreat(index) 1895 self.error_level = error_level 1896 1897 return this 1898 1899 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1900 start = self._prev 1901 exists = self._parse_exists() if allow_exists else None 1902 1903 self._match(TokenType.ON) 1904 1905 materialized = self._match_text_seq("MATERIALIZED") 1906 kind = self._match_set(self.CREATABLES) and self._prev 1907 if not kind: 1908 return self._parse_as_command(start) 1909 1910 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1911 this = self._parse_user_defined_function(kind=kind.token_type) 1912 elif kind.token_type == TokenType.TABLE: 1913 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1914 elif kind.token_type == TokenType.COLUMN: 1915 this = self._parse_column() 1916 else: 1917 this = self._parse_id_var() 1918 1919 self._match(TokenType.IS) 1920 1921 return self.expression( 1922 exp.Comment, 1923 this=this, 1924 kind=kind.text, 1925 expression=self._parse_string(), 1926 exists=exists, 1927 materialized=materialized, 1928 ) 1929 1930 def _parse_to_table( 1931 self, 1932 ) -> exp.ToTableProperty: 1933 table = self._parse_table_parts(schema=True) 1934 return self.expression(exp.ToTableProperty, this=table) 1935 1936 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1937 def _parse_ttl(self) -> exp.Expression: 1938 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1939 this = self._parse_bitwise() 1940 1941 if self._match_text_seq("DELETE"): 1942 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1943 if self._match_text_seq("RECOMPRESS"): 1944 return self.expression( 1945 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1946 ) 1947 if self._match_text_seq("TO", "DISK"): 1948 return self.expression( 1949 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1950 ) 1951 if self._match_text_seq("TO", "VOLUME"): 1952 return self.expression( 1953 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1954 ) 1955 1956 return this 1957 1958 expressions = self._parse_csv(_parse_ttl_action) 1959 where = self._parse_where() 1960 group = self._parse_group() 1961 1962 aggregates = None 1963 if group and self._match(TokenType.SET): 1964 aggregates = self._parse_csv(self._parse_set_item) 1965 1966 return self.expression( 1967 exp.MergeTreeTTL, 1968 expressions=expressions, 1969 where=where, 1970 group=group, 1971 aggregates=aggregates, 1972 ) 1973 1974 def _parse_statement(self) -> t.Optional[exp.Expression]: 1975 if self._curr is None: 1976 return None 1977 1978 if self._match_set(self.STATEMENT_PARSERS): 1979 comments = self._prev_comments 1980 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1981 stmt.add_comments(comments, prepend=True) 1982 return stmt 1983 1984 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1985 return self._parse_command() 1986 1987 expression = self._parse_expression() 1988 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1989 return self._parse_query_modifiers(expression) 1990 1991 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1992 start = self._prev 1993 temporary = self._match(TokenType.TEMPORARY) 1994 materialized = self._match_text_seq("MATERIALIZED") 1995 1996 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1997 if not kind: 1998 return self._parse_as_command(start) 1999 2000 concurrently = self._match_text_seq("CONCURRENTLY") 2001 if_exists = exists or self._parse_exists() 2002 2003 if kind == "COLUMN": 2004 this = self._parse_column() 2005 else: 2006 this = self._parse_table_parts( 2007 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 2008 ) 2009 2010 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 2011 2012 if self._match(TokenType.L_PAREN, advance=False): 2013 expressions = self._parse_wrapped_csv(self._parse_types) 2014 else: 2015 expressions = None 2016 2017 return self.expression( 2018 exp.Drop, 2019 exists=if_exists, 2020 this=this, 2021 expressions=expressions, 2022 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 2023 temporary=temporary, 2024 materialized=materialized, 2025 cascade=self._match_text_seq("CASCADE"), 2026 constraints=self._match_text_seq("CONSTRAINTS"), 2027 purge=self._match_text_seq("PURGE"), 2028 cluster=cluster, 2029 concurrently=concurrently, 2030 ) 2031 2032 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 2033 return ( 2034 self._match_text_seq("IF") 2035 and (not not_ or self._match(TokenType.NOT)) 2036 and self._match(TokenType.EXISTS) 2037 ) 2038 2039 def _parse_create(self) -> exp.Create | exp.Command: 2040 # Note: this can't be None because we've matched a statement parser 2041 start = self._prev 2042 2043 replace = ( 2044 start.token_type == TokenType.REPLACE 2045 or self._match_pair(TokenType.OR, TokenType.REPLACE) 2046 or self._match_pair(TokenType.OR, TokenType.ALTER) 2047 ) 2048 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 2049 2050 unique = self._match(TokenType.UNIQUE) 2051 2052 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2053 clustered = True 2054 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2055 "COLUMNSTORE" 2056 ): 2057 clustered = False 2058 else: 2059 clustered = None 2060 2061 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2062 self._advance() 2063 2064 properties = None 2065 create_token = self._match_set(self.CREATABLES) and self._prev 2066 2067 if not create_token: 2068 # exp.Properties.Location.POST_CREATE 2069 properties = self._parse_properties() 2070 create_token = self._match_set(self.CREATABLES) and self._prev 2071 2072 if not properties or not create_token: 2073 return self._parse_as_command(start) 2074 2075 concurrently = self._match_text_seq("CONCURRENTLY") 2076 exists = self._parse_exists(not_=True) 2077 this = None 2078 expression: t.Optional[exp.Expression] = None 2079 indexes = None 2080 no_schema_binding = None 2081 begin = None 2082 end = None 2083 clone = None 2084 2085 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2086 nonlocal properties 2087 if properties and temp_props: 2088 properties.expressions.extend(temp_props.expressions) 2089 elif temp_props: 2090 properties = temp_props 2091 2092 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2093 this = self._parse_user_defined_function(kind=create_token.token_type) 2094 2095 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2096 extend_props(self._parse_properties()) 2097 2098 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2099 extend_props(self._parse_properties()) 2100 2101 if not expression: 2102 if self._match(TokenType.COMMAND): 2103 expression = self._parse_as_command(self._prev) 2104 else: 2105 begin = self._match(TokenType.BEGIN) 2106 return_ = self._match_text_seq("RETURN") 2107 2108 if self._match(TokenType.STRING, advance=False): 2109 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2110 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2111 expression = self._parse_string() 2112 extend_props(self._parse_properties()) 2113 else: 2114 expression = self._parse_user_defined_function_expression() 2115 2116 end = self._match_text_seq("END") 2117 2118 if return_: 2119 expression = self.expression(exp.Return, this=expression) 2120 elif create_token.token_type == TokenType.INDEX: 2121 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2122 if not self._match(TokenType.ON): 2123 index = self._parse_id_var() 2124 anonymous = False 2125 else: 2126 index = None 2127 anonymous = True 2128 2129 this = self._parse_index(index=index, anonymous=anonymous) 2130 elif create_token.token_type in self.DB_CREATABLES: 2131 table_parts = self._parse_table_parts( 2132 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2133 ) 2134 2135 # exp.Properties.Location.POST_NAME 2136 self._match(TokenType.COMMA) 2137 extend_props(self._parse_properties(before=True)) 2138 2139 this = self._parse_schema(this=table_parts) 2140 2141 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2142 extend_props(self._parse_properties()) 2143 2144 has_alias = self._match(TokenType.ALIAS) 2145 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2146 # exp.Properties.Location.POST_ALIAS 2147 extend_props(self._parse_properties()) 2148 2149 if create_token.token_type == TokenType.SEQUENCE: 2150 expression = self._parse_types() 2151 props = self._parse_properties() 2152 if props: 2153 sequence_props = exp.SequenceProperties() 2154 options = [] 2155 for prop in props: 2156 if isinstance(prop, exp.SequenceProperties): 2157 for arg, value in prop.args.items(): 2158 if arg == "options": 2159 options.extend(value) 2160 else: 2161 sequence_props.set(arg, value) 2162 prop.pop() 2163 2164 if options: 2165 sequence_props.set("options", options) 2166 2167 props.append("expressions", sequence_props) 2168 extend_props(props) 2169 else: 2170 expression = self._parse_ddl_select() 2171 2172 # Some dialects also support using a table as an alias instead of a SELECT. 2173 # Here we fallback to this as an alternative. 2174 if not expression and has_alias: 2175 expression = self._try_parse(self._parse_table_parts) 2176 2177 if create_token.token_type == TokenType.TABLE: 2178 # exp.Properties.Location.POST_EXPRESSION 2179 extend_props(self._parse_properties()) 2180 2181 indexes = [] 2182 while True: 2183 index = self._parse_index() 2184 2185 # exp.Properties.Location.POST_INDEX 2186 extend_props(self._parse_properties()) 2187 if not index: 2188 break 2189 else: 2190 self._match(TokenType.COMMA) 2191 indexes.append(index) 2192 elif create_token.token_type == TokenType.VIEW: 2193 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2194 no_schema_binding = True 2195 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2196 extend_props(self._parse_properties()) 2197 2198 shallow = self._match_text_seq("SHALLOW") 2199 2200 if self._match_texts(self.CLONE_KEYWORDS): 2201 copy = self._prev.text.lower() == "copy" 2202 clone = self.expression( 2203 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2204 ) 2205 2206 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2207 return self._parse_as_command(start) 2208 2209 create_kind_text = create_token.text.upper() 2210 return self.expression( 2211 exp.Create, 2212 this=this, 2213 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2214 replace=replace, 2215 refresh=refresh, 2216 unique=unique, 2217 expression=expression, 2218 exists=exists, 2219 properties=properties, 2220 indexes=indexes, 2221 no_schema_binding=no_schema_binding, 2222 begin=begin, 2223 end=end, 2224 clone=clone, 2225 concurrently=concurrently, 2226 clustered=clustered, 2227 ) 2228 2229 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2230 seq = exp.SequenceProperties() 2231 2232 options = [] 2233 index = self._index 2234 2235 while self._curr: 2236 self._match(TokenType.COMMA) 2237 if self._match_text_seq("INCREMENT"): 2238 self._match_text_seq("BY") 2239 self._match_text_seq("=") 2240 seq.set("increment", self._parse_term()) 2241 elif self._match_text_seq("MINVALUE"): 2242 seq.set("minvalue", self._parse_term()) 2243 elif self._match_text_seq("MAXVALUE"): 2244 seq.set("maxvalue", self._parse_term()) 2245 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2246 self._match_text_seq("=") 2247 seq.set("start", self._parse_term()) 2248 elif self._match_text_seq("CACHE"): 2249 # T-SQL allows empty CACHE which is initialized dynamically 2250 seq.set("cache", self._parse_number() or True) 2251 elif self._match_text_seq("OWNED", "BY"): 2252 # "OWNED BY NONE" is the default 2253 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2254 else: 2255 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2256 if opt: 2257 options.append(opt) 2258 else: 2259 break 2260 2261 seq.set("options", options if options else None) 2262 return None if self._index == index else seq 2263 2264 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2265 # only used for teradata currently 2266 self._match(TokenType.COMMA) 2267 2268 kwargs = { 2269 "no": self._match_text_seq("NO"), 2270 "dual": self._match_text_seq("DUAL"), 2271 "before": self._match_text_seq("BEFORE"), 2272 "default": self._match_text_seq("DEFAULT"), 2273 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2274 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2275 "after": self._match_text_seq("AFTER"), 2276 "minimum": self._match_texts(("MIN", "MINIMUM")), 2277 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2278 } 2279 2280 if self._match_texts(self.PROPERTY_PARSERS): 2281 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2282 try: 2283 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2284 except TypeError: 2285 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2286 2287 return None 2288 2289 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2290 return self._parse_wrapped_csv(self._parse_property) 2291 2292 def _parse_property(self) -> t.Optional[exp.Expression]: 2293 if self._match_texts(self.PROPERTY_PARSERS): 2294 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2295 2296 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2297 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2298 2299 if self._match_text_seq("COMPOUND", "SORTKEY"): 2300 return self._parse_sortkey(compound=True) 2301 2302 if self._match_text_seq("SQL", "SECURITY"): 2303 return self.expression( 2304 exp.SqlSecurityProperty, 2305 this=self._match_texts(("DEFINER", "INVOKER")) and self._prev.text.upper(), 2306 ) 2307 2308 index = self._index 2309 2310 seq_props = self._parse_sequence_properties() 2311 if seq_props: 2312 return seq_props 2313 2314 self._retreat(index) 2315 key = self._parse_column() 2316 2317 if not self._match(TokenType.EQ): 2318 self._retreat(index) 2319 return None 2320 2321 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2322 if isinstance(key, exp.Column): 2323 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2324 2325 value = self._parse_bitwise() or self._parse_var(any_token=True) 2326 2327 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2328 if isinstance(value, exp.Column): 2329 value = exp.var(value.name) 2330 2331 return self.expression(exp.Property, this=key, value=value) 2332 2333 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2334 if self._match_text_seq("BY"): 2335 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2336 2337 self._match(TokenType.ALIAS) 2338 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2339 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2340 2341 return self.expression( 2342 exp.FileFormatProperty, 2343 this=( 2344 self.expression( 2345 exp.InputOutputFormat, 2346 input_format=input_format, 2347 output_format=output_format, 2348 ) 2349 if input_format or output_format 2350 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2351 ), 2352 hive_format=True, 2353 ) 2354 2355 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2356 field = self._parse_field() 2357 if isinstance(field, exp.Identifier) and not field.quoted: 2358 field = exp.var(field) 2359 2360 return field 2361 2362 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2363 self._match(TokenType.EQ) 2364 self._match(TokenType.ALIAS) 2365 2366 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2367 2368 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2369 properties = [] 2370 while True: 2371 if before: 2372 prop = self._parse_property_before() 2373 else: 2374 prop = self._parse_property() 2375 if not prop: 2376 break 2377 for p in ensure_list(prop): 2378 properties.append(p) 2379 2380 if properties: 2381 return self.expression(exp.Properties, expressions=properties) 2382 2383 return None 2384 2385 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2386 return self.expression( 2387 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2388 ) 2389 2390 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2391 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2392 security_specifier = self._prev.text.upper() 2393 return self.expression(exp.SecurityProperty, this=security_specifier) 2394 return None 2395 2396 def _parse_settings_property(self) -> exp.SettingsProperty: 2397 return self.expression( 2398 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2399 ) 2400 2401 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2402 if self._index >= 2: 2403 pre_volatile_token = self._tokens[self._index - 2] 2404 else: 2405 pre_volatile_token = None 2406 2407 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2408 return exp.VolatileProperty() 2409 2410 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2411 2412 def _parse_retention_period(self) -> exp.Var: 2413 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2414 number = self._parse_number() 2415 number_str = f"{number} " if number else "" 2416 unit = self._parse_var(any_token=True) 2417 return exp.var(f"{number_str}{unit}") 2418 2419 def _parse_system_versioning_property( 2420 self, with_: bool = False 2421 ) -> exp.WithSystemVersioningProperty: 2422 self._match(TokenType.EQ) 2423 prop = self.expression( 2424 exp.WithSystemVersioningProperty, 2425 on=True, 2426 with_=with_, 2427 ) 2428 2429 if self._match_text_seq("OFF"): 2430 prop.set("on", False) 2431 return prop 2432 2433 self._match(TokenType.ON) 2434 if self._match(TokenType.L_PAREN): 2435 while self._curr and not self._match(TokenType.R_PAREN): 2436 if self._match_text_seq("HISTORY_TABLE", "="): 2437 prop.set("this", self._parse_table_parts()) 2438 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2439 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2440 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2441 prop.set("retention_period", self._parse_retention_period()) 2442 2443 self._match(TokenType.COMMA) 2444 2445 return prop 2446 2447 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2448 self._match(TokenType.EQ) 2449 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2450 prop = self.expression(exp.DataDeletionProperty, on=on) 2451 2452 if self._match(TokenType.L_PAREN): 2453 while self._curr and not self._match(TokenType.R_PAREN): 2454 if self._match_text_seq("FILTER_COLUMN", "="): 2455 prop.set("filter_column", self._parse_column()) 2456 elif self._match_text_seq("RETENTION_PERIOD", "="): 2457 prop.set("retention_period", self._parse_retention_period()) 2458 2459 self._match(TokenType.COMMA) 2460 2461 return prop 2462 2463 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2464 kind = "HASH" 2465 expressions: t.Optional[t.List[exp.Expression]] = None 2466 if self._match_text_seq("BY", "HASH"): 2467 expressions = self._parse_wrapped_csv(self._parse_id_var) 2468 elif self._match_text_seq("BY", "RANDOM"): 2469 kind = "RANDOM" 2470 2471 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2472 buckets: t.Optional[exp.Expression] = None 2473 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2474 buckets = self._parse_number() 2475 2476 return self.expression( 2477 exp.DistributedByProperty, 2478 expressions=expressions, 2479 kind=kind, 2480 buckets=buckets, 2481 order=self._parse_order(), 2482 ) 2483 2484 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2485 self._match_text_seq("KEY") 2486 expressions = self._parse_wrapped_id_vars() 2487 return self.expression(expr_type, expressions=expressions) 2488 2489 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2490 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2491 prop = self._parse_system_versioning_property(with_=True) 2492 self._match_r_paren() 2493 return prop 2494 2495 if self._match(TokenType.L_PAREN, advance=False): 2496 return self._parse_wrapped_properties() 2497 2498 if self._match_text_seq("JOURNAL"): 2499 return self._parse_withjournaltable() 2500 2501 if self._match_texts(self.VIEW_ATTRIBUTES): 2502 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2503 2504 if self._match_text_seq("DATA"): 2505 return self._parse_withdata(no=False) 2506 elif self._match_text_seq("NO", "DATA"): 2507 return self._parse_withdata(no=True) 2508 2509 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2510 return self._parse_serde_properties(with_=True) 2511 2512 if self._match(TokenType.SCHEMA): 2513 return self.expression( 2514 exp.WithSchemaBindingProperty, 2515 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2516 ) 2517 2518 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2519 return self.expression( 2520 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2521 ) 2522 2523 if not self._next: 2524 return None 2525 2526 return self._parse_withisolatedloading() 2527 2528 def _parse_procedure_option(self) -> exp.Expression | None: 2529 if self._match_text_seq("EXECUTE", "AS"): 2530 return self.expression( 2531 exp.ExecuteAsProperty, 2532 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2533 or self._parse_string(), 2534 ) 2535 2536 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2537 2538 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2539 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2540 self._match(TokenType.EQ) 2541 2542 user = self._parse_id_var() 2543 self._match(TokenType.PARAMETER) 2544 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2545 2546 if not user or not host: 2547 return None 2548 2549 return exp.DefinerProperty(this=f"{user}@{host}") 2550 2551 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2552 self._match(TokenType.TABLE) 2553 self._match(TokenType.EQ) 2554 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2555 2556 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2557 return self.expression(exp.LogProperty, no=no) 2558 2559 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2560 return self.expression(exp.JournalProperty, **kwargs) 2561 2562 def _parse_checksum(self) -> exp.ChecksumProperty: 2563 self._match(TokenType.EQ) 2564 2565 on = None 2566 if self._match(TokenType.ON): 2567 on = True 2568 elif self._match_text_seq("OFF"): 2569 on = False 2570 2571 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2572 2573 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2574 return self.expression( 2575 exp.Cluster, 2576 expressions=( 2577 self._parse_wrapped_csv(self._parse_ordered) 2578 if wrapped 2579 else self._parse_csv(self._parse_ordered) 2580 ), 2581 ) 2582 2583 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2584 self._match_text_seq("BY") 2585 2586 self._match_l_paren() 2587 expressions = self._parse_csv(self._parse_column) 2588 self._match_r_paren() 2589 2590 if self._match_text_seq("SORTED", "BY"): 2591 self._match_l_paren() 2592 sorted_by = self._parse_csv(self._parse_ordered) 2593 self._match_r_paren() 2594 else: 2595 sorted_by = None 2596 2597 self._match(TokenType.INTO) 2598 buckets = self._parse_number() 2599 self._match_text_seq("BUCKETS") 2600 2601 return self.expression( 2602 exp.ClusteredByProperty, 2603 expressions=expressions, 2604 sorted_by=sorted_by, 2605 buckets=buckets, 2606 ) 2607 2608 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2609 if not self._match_text_seq("GRANTS"): 2610 self._retreat(self._index - 1) 2611 return None 2612 2613 return self.expression(exp.CopyGrantsProperty) 2614 2615 def _parse_freespace(self) -> exp.FreespaceProperty: 2616 self._match(TokenType.EQ) 2617 return self.expression( 2618 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2619 ) 2620 2621 def _parse_mergeblockratio( 2622 self, no: bool = False, default: bool = False 2623 ) -> exp.MergeBlockRatioProperty: 2624 if self._match(TokenType.EQ): 2625 return self.expression( 2626 exp.MergeBlockRatioProperty, 2627 this=self._parse_number(), 2628 percent=self._match(TokenType.PERCENT), 2629 ) 2630 2631 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2632 2633 def _parse_datablocksize( 2634 self, 2635 default: t.Optional[bool] = None, 2636 minimum: t.Optional[bool] = None, 2637 maximum: t.Optional[bool] = None, 2638 ) -> exp.DataBlocksizeProperty: 2639 self._match(TokenType.EQ) 2640 size = self._parse_number() 2641 2642 units = None 2643 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2644 units = self._prev.text 2645 2646 return self.expression( 2647 exp.DataBlocksizeProperty, 2648 size=size, 2649 units=units, 2650 default=default, 2651 minimum=minimum, 2652 maximum=maximum, 2653 ) 2654 2655 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2656 self._match(TokenType.EQ) 2657 always = self._match_text_seq("ALWAYS") 2658 manual = self._match_text_seq("MANUAL") 2659 never = self._match_text_seq("NEVER") 2660 default = self._match_text_seq("DEFAULT") 2661 2662 autotemp = None 2663 if self._match_text_seq("AUTOTEMP"): 2664 autotemp = self._parse_schema() 2665 2666 return self.expression( 2667 exp.BlockCompressionProperty, 2668 always=always, 2669 manual=manual, 2670 never=never, 2671 default=default, 2672 autotemp=autotemp, 2673 ) 2674 2675 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2676 index = self._index 2677 no = self._match_text_seq("NO") 2678 concurrent = self._match_text_seq("CONCURRENT") 2679 2680 if not self._match_text_seq("ISOLATED", "LOADING"): 2681 self._retreat(index) 2682 return None 2683 2684 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2685 return self.expression( 2686 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2687 ) 2688 2689 def _parse_locking(self) -> exp.LockingProperty: 2690 if self._match(TokenType.TABLE): 2691 kind = "TABLE" 2692 elif self._match(TokenType.VIEW): 2693 kind = "VIEW" 2694 elif self._match(TokenType.ROW): 2695 kind = "ROW" 2696 elif self._match_text_seq("DATABASE"): 2697 kind = "DATABASE" 2698 else: 2699 kind = None 2700 2701 if kind in ("DATABASE", "TABLE", "VIEW"): 2702 this = self._parse_table_parts() 2703 else: 2704 this = None 2705 2706 if self._match(TokenType.FOR): 2707 for_or_in = "FOR" 2708 elif self._match(TokenType.IN): 2709 for_or_in = "IN" 2710 else: 2711 for_or_in = None 2712 2713 if self._match_text_seq("ACCESS"): 2714 lock_type = "ACCESS" 2715 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2716 lock_type = "EXCLUSIVE" 2717 elif self._match_text_seq("SHARE"): 2718 lock_type = "SHARE" 2719 elif self._match_text_seq("READ"): 2720 lock_type = "READ" 2721 elif self._match_text_seq("WRITE"): 2722 lock_type = "WRITE" 2723 elif self._match_text_seq("CHECKSUM"): 2724 lock_type = "CHECKSUM" 2725 else: 2726 lock_type = None 2727 2728 override = self._match_text_seq("OVERRIDE") 2729 2730 return self.expression( 2731 exp.LockingProperty, 2732 this=this, 2733 kind=kind, 2734 for_or_in=for_or_in, 2735 lock_type=lock_type, 2736 override=override, 2737 ) 2738 2739 def _parse_partition_by(self) -> t.List[exp.Expression]: 2740 if self._match(TokenType.PARTITION_BY): 2741 return self._parse_csv(self._parse_disjunction) 2742 return [] 2743 2744 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2745 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2746 if self._match_text_seq("MINVALUE"): 2747 return exp.var("MINVALUE") 2748 if self._match_text_seq("MAXVALUE"): 2749 return exp.var("MAXVALUE") 2750 return self._parse_bitwise() 2751 2752 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2753 expression = None 2754 from_expressions = None 2755 to_expressions = None 2756 2757 if self._match(TokenType.IN): 2758 this = self._parse_wrapped_csv(self._parse_bitwise) 2759 elif self._match(TokenType.FROM): 2760 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2761 self._match_text_seq("TO") 2762 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2763 elif self._match_text_seq("WITH", "(", "MODULUS"): 2764 this = self._parse_number() 2765 self._match_text_seq(",", "REMAINDER") 2766 expression = self._parse_number() 2767 self._match_r_paren() 2768 else: 2769 self.raise_error("Failed to parse partition bound spec.") 2770 2771 return self.expression( 2772 exp.PartitionBoundSpec, 2773 this=this, 2774 expression=expression, 2775 from_expressions=from_expressions, 2776 to_expressions=to_expressions, 2777 ) 2778 2779 # https://www.postgresql.org/docs/current/sql-createtable.html 2780 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2781 if not self._match_text_seq("OF"): 2782 self._retreat(self._index - 1) 2783 return None 2784 2785 this = self._parse_table(schema=True) 2786 2787 if self._match(TokenType.DEFAULT): 2788 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2789 elif self._match_text_seq("FOR", "VALUES"): 2790 expression = self._parse_partition_bound_spec() 2791 else: 2792 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2793 2794 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2795 2796 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2797 self._match(TokenType.EQ) 2798 return self.expression( 2799 exp.PartitionedByProperty, 2800 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2801 ) 2802 2803 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2804 if self._match_text_seq("AND", "STATISTICS"): 2805 statistics = True 2806 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2807 statistics = False 2808 else: 2809 statistics = None 2810 2811 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2812 2813 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2814 if self._match_text_seq("SQL"): 2815 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2816 return None 2817 2818 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2819 if self._match_text_seq("SQL", "DATA"): 2820 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2821 return None 2822 2823 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2824 if self._match_text_seq("PRIMARY", "INDEX"): 2825 return exp.NoPrimaryIndexProperty() 2826 if self._match_text_seq("SQL"): 2827 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2828 return None 2829 2830 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2831 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2832 return exp.OnCommitProperty() 2833 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2834 return exp.OnCommitProperty(delete=True) 2835 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2836 2837 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2838 if self._match_text_seq("SQL", "DATA"): 2839 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2840 return None 2841 2842 def _parse_distkey(self) -> exp.DistKeyProperty: 2843 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2844 2845 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2846 table = self._parse_table(schema=True) 2847 2848 options = [] 2849 while self._match_texts(("INCLUDING", "EXCLUDING")): 2850 this = self._prev.text.upper() 2851 2852 id_var = self._parse_id_var() 2853 if not id_var: 2854 return None 2855 2856 options.append( 2857 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2858 ) 2859 2860 return self.expression(exp.LikeProperty, this=table, expressions=options) 2861 2862 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2863 return self.expression( 2864 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2865 ) 2866 2867 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2868 self._match(TokenType.EQ) 2869 return self.expression( 2870 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2871 ) 2872 2873 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2874 self._match_text_seq("WITH", "CONNECTION") 2875 return self.expression( 2876 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2877 ) 2878 2879 def _parse_returns(self) -> exp.ReturnsProperty: 2880 value: t.Optional[exp.Expression] 2881 null = None 2882 is_table = self._match(TokenType.TABLE) 2883 2884 if is_table: 2885 if self._match(TokenType.LT): 2886 value = self.expression( 2887 exp.Schema, 2888 this="TABLE", 2889 expressions=self._parse_csv(self._parse_struct_types), 2890 ) 2891 if not self._match(TokenType.GT): 2892 self.raise_error("Expecting >") 2893 else: 2894 value = self._parse_schema(exp.var("TABLE")) 2895 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2896 null = True 2897 value = None 2898 else: 2899 value = self._parse_types() 2900 2901 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2902 2903 def _parse_describe(self) -> exp.Describe: 2904 kind = self._match_set(self.CREATABLES) and self._prev.text 2905 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2906 if self._match(TokenType.DOT): 2907 style = None 2908 self._retreat(self._index - 2) 2909 2910 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2911 2912 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2913 this = self._parse_statement() 2914 else: 2915 this = self._parse_table(schema=True) 2916 2917 properties = self._parse_properties() 2918 expressions = properties.expressions if properties else None 2919 partition = self._parse_partition() 2920 return self.expression( 2921 exp.Describe, 2922 this=this, 2923 style=style, 2924 kind=kind, 2925 expressions=expressions, 2926 partition=partition, 2927 format=format, 2928 ) 2929 2930 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2931 kind = self._prev.text.upper() 2932 expressions = [] 2933 2934 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2935 if self._match(TokenType.WHEN): 2936 expression = self._parse_disjunction() 2937 self._match(TokenType.THEN) 2938 else: 2939 expression = None 2940 2941 else_ = self._match(TokenType.ELSE) 2942 2943 if not self._match(TokenType.INTO): 2944 return None 2945 2946 return self.expression( 2947 exp.ConditionalInsert, 2948 this=self.expression( 2949 exp.Insert, 2950 this=self._parse_table(schema=True), 2951 expression=self._parse_derived_table_values(), 2952 ), 2953 expression=expression, 2954 else_=else_, 2955 ) 2956 2957 expression = parse_conditional_insert() 2958 while expression is not None: 2959 expressions.append(expression) 2960 expression = parse_conditional_insert() 2961 2962 return self.expression( 2963 exp.MultitableInserts, 2964 kind=kind, 2965 comments=comments, 2966 expressions=expressions, 2967 source=self._parse_table(), 2968 ) 2969 2970 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2971 comments = [] 2972 hint = self._parse_hint() 2973 overwrite = self._match(TokenType.OVERWRITE) 2974 ignore = self._match(TokenType.IGNORE) 2975 local = self._match_text_seq("LOCAL") 2976 alternative = None 2977 is_function = None 2978 2979 if self._match_text_seq("DIRECTORY"): 2980 this: t.Optional[exp.Expression] = self.expression( 2981 exp.Directory, 2982 this=self._parse_var_or_string(), 2983 local=local, 2984 row_format=self._parse_row_format(match_row=True), 2985 ) 2986 else: 2987 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2988 comments += ensure_list(self._prev_comments) 2989 return self._parse_multitable_inserts(comments) 2990 2991 if self._match(TokenType.OR): 2992 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2993 2994 self._match(TokenType.INTO) 2995 comments += ensure_list(self._prev_comments) 2996 self._match(TokenType.TABLE) 2997 is_function = self._match(TokenType.FUNCTION) 2998 2999 this = self._parse_function() if is_function else self._parse_insert_table() 3000 3001 returning = self._parse_returning() # TSQL allows RETURNING before source 3002 3003 return self.expression( 3004 exp.Insert, 3005 comments=comments, 3006 hint=hint, 3007 is_function=is_function, 3008 this=this, 3009 stored=self._match_text_seq("STORED") and self._parse_stored(), 3010 by_name=self._match_text_seq("BY", "NAME"), 3011 exists=self._parse_exists(), 3012 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 3013 and self._parse_disjunction(), 3014 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 3015 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 3016 default=self._match_text_seq("DEFAULT", "VALUES"), 3017 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 3018 conflict=self._parse_on_conflict(), 3019 returning=returning or self._parse_returning(), 3020 overwrite=overwrite, 3021 alternative=alternative, 3022 ignore=ignore, 3023 source=self._match(TokenType.TABLE) and self._parse_table(), 3024 ) 3025 3026 def _parse_insert_table(self) -> t.Optional[exp.Expression]: 3027 this = self._parse_table(schema=True, parse_partition=True) 3028 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 3029 this.set("alias", self._parse_table_alias()) 3030 return this 3031 3032 def _parse_kill(self) -> exp.Kill: 3033 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 3034 3035 return self.expression( 3036 exp.Kill, 3037 this=self._parse_primary(), 3038 kind=kind, 3039 ) 3040 3041 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 3042 conflict = self._match_text_seq("ON", "CONFLICT") 3043 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 3044 3045 if not conflict and not duplicate: 3046 return None 3047 3048 conflict_keys = None 3049 constraint = None 3050 3051 if conflict: 3052 if self._match_text_seq("ON", "CONSTRAINT"): 3053 constraint = self._parse_id_var() 3054 elif self._match(TokenType.L_PAREN): 3055 conflict_keys = self._parse_csv(self._parse_id_var) 3056 self._match_r_paren() 3057 3058 index_predicate = self._parse_where() 3059 3060 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3061 if self._prev.token_type == TokenType.UPDATE: 3062 self._match(TokenType.SET) 3063 expressions = self._parse_csv(self._parse_equality) 3064 else: 3065 expressions = None 3066 3067 return self.expression( 3068 exp.OnConflict, 3069 duplicate=duplicate, 3070 expressions=expressions, 3071 action=action, 3072 conflict_keys=conflict_keys, 3073 index_predicate=index_predicate, 3074 constraint=constraint, 3075 where=self._parse_where(), 3076 ) 3077 3078 def _parse_returning(self) -> t.Optional[exp.Returning]: 3079 if not self._match(TokenType.RETURNING): 3080 return None 3081 return self.expression( 3082 exp.Returning, 3083 expressions=self._parse_csv(self._parse_expression), 3084 into=self._match(TokenType.INTO) and self._parse_table_part(), 3085 ) 3086 3087 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3088 if not self._match(TokenType.FORMAT): 3089 return None 3090 return self._parse_row_format() 3091 3092 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3093 index = self._index 3094 with_ = with_ or self._match_text_seq("WITH") 3095 3096 if not self._match(TokenType.SERDE_PROPERTIES): 3097 self._retreat(index) 3098 return None 3099 return self.expression( 3100 exp.SerdeProperties, 3101 expressions=self._parse_wrapped_properties(), 3102 with_=with_, 3103 ) 3104 3105 def _parse_row_format( 3106 self, match_row: bool = False 3107 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3108 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3109 return None 3110 3111 if self._match_text_seq("SERDE"): 3112 this = self._parse_string() 3113 3114 serde_properties = self._parse_serde_properties() 3115 3116 return self.expression( 3117 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3118 ) 3119 3120 self._match_text_seq("DELIMITED") 3121 3122 kwargs = {} 3123 3124 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3125 kwargs["fields"] = self._parse_string() 3126 if self._match_text_seq("ESCAPED", "BY"): 3127 kwargs["escaped"] = self._parse_string() 3128 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3129 kwargs["collection_items"] = self._parse_string() 3130 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3131 kwargs["map_keys"] = self._parse_string() 3132 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3133 kwargs["lines"] = self._parse_string() 3134 if self._match_text_seq("NULL", "DEFINED", "AS"): 3135 kwargs["null"] = self._parse_string() 3136 3137 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3138 3139 def _parse_load(self) -> exp.LoadData | exp.Command: 3140 if self._match_text_seq("DATA"): 3141 local = self._match_text_seq("LOCAL") 3142 self._match_text_seq("INPATH") 3143 inpath = self._parse_string() 3144 overwrite = self._match(TokenType.OVERWRITE) 3145 self._match_pair(TokenType.INTO, TokenType.TABLE) 3146 3147 return self.expression( 3148 exp.LoadData, 3149 this=self._parse_table(schema=True), 3150 local=local, 3151 overwrite=overwrite, 3152 inpath=inpath, 3153 partition=self._parse_partition(), 3154 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3155 serde=self._match_text_seq("SERDE") and self._parse_string(), 3156 ) 3157 return self._parse_as_command(self._prev) 3158 3159 def _parse_delete(self) -> exp.Delete: 3160 # This handles MySQL's "Multiple-Table Syntax" 3161 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3162 tables = None 3163 if not self._match(TokenType.FROM, advance=False): 3164 tables = self._parse_csv(self._parse_table) or None 3165 3166 returning = self._parse_returning() 3167 3168 return self.expression( 3169 exp.Delete, 3170 tables=tables, 3171 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3172 using=self._match(TokenType.USING) 3173 and self._parse_csv(lambda: self._parse_table(joins=True)), 3174 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3175 where=self._parse_where(), 3176 returning=returning or self._parse_returning(), 3177 order=self._parse_order(), 3178 limit=self._parse_limit(), 3179 ) 3180 3181 def _parse_update(self) -> exp.Update: 3182 kwargs: t.Dict[str, t.Any] = { 3183 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3184 } 3185 while self._curr: 3186 if self._match(TokenType.SET): 3187 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3188 elif self._match(TokenType.RETURNING, advance=False): 3189 kwargs["returning"] = self._parse_returning() 3190 elif self._match(TokenType.FROM, advance=False): 3191 from_ = self._parse_from(joins=True) 3192 table = from_.this if from_ else None 3193 if isinstance(table, exp.Subquery) and self._match(TokenType.JOIN, advance=False): 3194 table.set("joins", list(self._parse_joins()) or None) 3195 3196 kwargs["from_"] = from_ 3197 elif self._match(TokenType.WHERE, advance=False): 3198 kwargs["where"] = self._parse_where() 3199 elif self._match(TokenType.ORDER_BY, advance=False): 3200 kwargs["order"] = self._parse_order() 3201 elif self._match(TokenType.LIMIT, advance=False): 3202 kwargs["limit"] = self._parse_limit() 3203 else: 3204 break 3205 3206 return self.expression(exp.Update, **kwargs) 3207 3208 def _parse_use(self) -> exp.Use: 3209 return self.expression( 3210 exp.Use, 3211 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3212 this=self._parse_table(schema=False), 3213 ) 3214 3215 def _parse_uncache(self) -> exp.Uncache: 3216 if not self._match(TokenType.TABLE): 3217 self.raise_error("Expecting TABLE after UNCACHE") 3218 3219 return self.expression( 3220 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3221 ) 3222 3223 def _parse_cache(self) -> exp.Cache: 3224 lazy = self._match_text_seq("LAZY") 3225 self._match(TokenType.TABLE) 3226 table = self._parse_table(schema=True) 3227 3228 options = [] 3229 if self._match_text_seq("OPTIONS"): 3230 self._match_l_paren() 3231 k = self._parse_string() 3232 self._match(TokenType.EQ) 3233 v = self._parse_string() 3234 options = [k, v] 3235 self._match_r_paren() 3236 3237 self._match(TokenType.ALIAS) 3238 return self.expression( 3239 exp.Cache, 3240 this=table, 3241 lazy=lazy, 3242 options=options, 3243 expression=self._parse_select(nested=True), 3244 ) 3245 3246 def _parse_partition(self) -> t.Optional[exp.Partition]: 3247 if not self._match_texts(self.PARTITION_KEYWORDS): 3248 return None 3249 3250 return self.expression( 3251 exp.Partition, 3252 subpartition=self._prev.text.upper() == "SUBPARTITION", 3253 expressions=self._parse_wrapped_csv(self._parse_disjunction), 3254 ) 3255 3256 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3257 def _parse_value_expression() -> t.Optional[exp.Expression]: 3258 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3259 return exp.var(self._prev.text.upper()) 3260 return self._parse_expression() 3261 3262 if self._match(TokenType.L_PAREN): 3263 expressions = self._parse_csv(_parse_value_expression) 3264 self._match_r_paren() 3265 return self.expression(exp.Tuple, expressions=expressions) 3266 3267 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3268 expression = self._parse_expression() 3269 if expression: 3270 return self.expression(exp.Tuple, expressions=[expression]) 3271 return None 3272 3273 def _parse_projections(self) -> t.List[exp.Expression]: 3274 return self._parse_expressions() 3275 3276 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3277 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3278 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3279 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3280 ) 3281 elif self._match(TokenType.FROM): 3282 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3283 # Support parentheses for duckdb FROM-first syntax 3284 select = self._parse_select(from_=from_) 3285 if select: 3286 if not select.args.get("from_"): 3287 select.set("from_", from_) 3288 this = select 3289 else: 3290 this = exp.select("*").from_(t.cast(exp.From, from_)) 3291 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3292 else: 3293 this = ( 3294 self._parse_table(consume_pipe=True) 3295 if table 3296 else self._parse_select(nested=True, parse_set_operation=False) 3297 ) 3298 3299 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3300 # in case a modifier (e.g. join) is following 3301 if table and isinstance(this, exp.Values) and this.alias: 3302 alias = this.args["alias"].pop() 3303 this = exp.Table(this=this, alias=alias) 3304 3305 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3306 3307 return this 3308 3309 def _parse_select( 3310 self, 3311 nested: bool = False, 3312 table: bool = False, 3313 parse_subquery_alias: bool = True, 3314 parse_set_operation: bool = True, 3315 consume_pipe: bool = True, 3316 from_: t.Optional[exp.From] = None, 3317 ) -> t.Optional[exp.Expression]: 3318 query = self._parse_select_query( 3319 nested=nested, 3320 table=table, 3321 parse_subquery_alias=parse_subquery_alias, 3322 parse_set_operation=parse_set_operation, 3323 ) 3324 3325 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3326 if not query and from_: 3327 query = exp.select("*").from_(from_) 3328 if isinstance(query, exp.Query): 3329 query = self._parse_pipe_syntax_query(query) 3330 query = query.subquery(copy=False) if query and table else query 3331 3332 return query 3333 3334 def _parse_select_query( 3335 self, 3336 nested: bool = False, 3337 table: bool = False, 3338 parse_subquery_alias: bool = True, 3339 parse_set_operation: bool = True, 3340 ) -> t.Optional[exp.Expression]: 3341 cte = self._parse_with() 3342 3343 if cte: 3344 this = self._parse_statement() 3345 3346 if not this: 3347 self.raise_error("Failed to parse any statement following CTE") 3348 return cte 3349 3350 while isinstance(this, exp.Subquery) and this.is_wrapper: 3351 this = this.this 3352 3353 if "with_" in this.arg_types: 3354 this.set("with_", cte) 3355 else: 3356 self.raise_error(f"{this.key} does not support CTE") 3357 this = cte 3358 3359 return this 3360 3361 # duckdb supports leading with FROM x 3362 from_ = ( 3363 self._parse_from(joins=True, consume_pipe=True) 3364 if self._match(TokenType.FROM, advance=False) 3365 else None 3366 ) 3367 3368 if self._match(TokenType.SELECT): 3369 comments = self._prev_comments 3370 3371 hint = self._parse_hint() 3372 3373 if self._next and not self._next.token_type == TokenType.DOT: 3374 all_ = self._match(TokenType.ALL) 3375 distinct = self._match_set(self.DISTINCT_TOKENS) 3376 else: 3377 all_, distinct = None, None 3378 3379 kind = ( 3380 self._match(TokenType.ALIAS) 3381 and self._match_texts(("STRUCT", "VALUE")) 3382 and self._prev.text.upper() 3383 ) 3384 3385 if distinct: 3386 distinct = self.expression( 3387 exp.Distinct, 3388 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3389 ) 3390 3391 if all_ and distinct: 3392 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3393 3394 operation_modifiers = [] 3395 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3396 operation_modifiers.append(exp.var(self._prev.text.upper())) 3397 3398 limit = self._parse_limit(top=True) 3399 projections = self._parse_projections() 3400 3401 this = self.expression( 3402 exp.Select, 3403 kind=kind, 3404 hint=hint, 3405 distinct=distinct, 3406 expressions=projections, 3407 limit=limit, 3408 operation_modifiers=operation_modifiers or None, 3409 ) 3410 this.comments = comments 3411 3412 into = self._parse_into() 3413 if into: 3414 this.set("into", into) 3415 3416 if not from_: 3417 from_ = self._parse_from() 3418 3419 if from_: 3420 this.set("from_", from_) 3421 3422 this = self._parse_query_modifiers(this) 3423 elif (table or nested) and self._match(TokenType.L_PAREN): 3424 this = self._parse_wrapped_select(table=table) 3425 3426 # We return early here so that the UNION isn't attached to the subquery by the 3427 # following call to _parse_set_operations, but instead becomes the parent node 3428 self._match_r_paren() 3429 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3430 elif self._match(TokenType.VALUES, advance=False): 3431 this = self._parse_derived_table_values() 3432 elif from_: 3433 this = exp.select("*").from_(from_.this, copy=False) 3434 elif self._match(TokenType.SUMMARIZE): 3435 table = self._match(TokenType.TABLE) 3436 this = self._parse_select() or self._parse_string() or self._parse_table() 3437 return self.expression(exp.Summarize, this=this, table=table) 3438 elif self._match(TokenType.DESCRIBE): 3439 this = self._parse_describe() 3440 else: 3441 this = None 3442 3443 return self._parse_set_operations(this) if parse_set_operation else this 3444 3445 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3446 self._match_text_seq("SEARCH") 3447 3448 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3449 3450 if not kind: 3451 return None 3452 3453 self._match_text_seq("FIRST", "BY") 3454 3455 return self.expression( 3456 exp.RecursiveWithSearch, 3457 kind=kind, 3458 this=self._parse_id_var(), 3459 expression=self._match_text_seq("SET") and self._parse_id_var(), 3460 using=self._match_text_seq("USING") and self._parse_id_var(), 3461 ) 3462 3463 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3464 if not skip_with_token and not self._match(TokenType.WITH): 3465 return None 3466 3467 comments = self._prev_comments 3468 recursive = self._match(TokenType.RECURSIVE) 3469 3470 last_comments = None 3471 expressions = [] 3472 while True: 3473 cte = self._parse_cte() 3474 if isinstance(cte, exp.CTE): 3475 expressions.append(cte) 3476 if last_comments: 3477 cte.add_comments(last_comments) 3478 3479 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3480 break 3481 else: 3482 self._match(TokenType.WITH) 3483 3484 last_comments = self._prev_comments 3485 3486 return self.expression( 3487 exp.With, 3488 comments=comments, 3489 expressions=expressions, 3490 recursive=recursive, 3491 search=self._parse_recursive_with_search(), 3492 ) 3493 3494 def _parse_cte(self) -> t.Optional[exp.CTE]: 3495 index = self._index 3496 3497 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3498 if not alias or not alias.this: 3499 self.raise_error("Expected CTE to have alias") 3500 3501 key_expressions = ( 3502 self._parse_wrapped_id_vars() if self._match_text_seq("USING", "KEY") else None 3503 ) 3504 3505 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3506 self._retreat(index) 3507 return None 3508 3509 comments = self._prev_comments 3510 3511 if self._match_text_seq("NOT", "MATERIALIZED"): 3512 materialized = False 3513 elif self._match_text_seq("MATERIALIZED"): 3514 materialized = True 3515 else: 3516 materialized = None 3517 3518 cte = self.expression( 3519 exp.CTE, 3520 this=self._parse_wrapped(self._parse_statement), 3521 alias=alias, 3522 materialized=materialized, 3523 key_expressions=key_expressions, 3524 comments=comments, 3525 ) 3526 3527 values = cte.this 3528 if isinstance(values, exp.Values): 3529 if values.alias: 3530 cte.set("this", exp.select("*").from_(values)) 3531 else: 3532 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3533 3534 return cte 3535 3536 def _parse_table_alias( 3537 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3538 ) -> t.Optional[exp.TableAlias]: 3539 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3540 # so this section tries to parse the clause version and if it fails, it treats the token 3541 # as an identifier (alias) 3542 if self._can_parse_limit_or_offset(): 3543 return None 3544 3545 any_token = self._match(TokenType.ALIAS) 3546 alias = ( 3547 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3548 or self._parse_string_as_identifier() 3549 ) 3550 3551 index = self._index 3552 if self._match(TokenType.L_PAREN): 3553 columns = self._parse_csv(self._parse_function_parameter) 3554 self._match_r_paren() if columns else self._retreat(index) 3555 else: 3556 columns = None 3557 3558 if not alias and not columns: 3559 return None 3560 3561 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3562 3563 # We bubble up comments from the Identifier to the TableAlias 3564 if isinstance(alias, exp.Identifier): 3565 table_alias.add_comments(alias.pop_comments()) 3566 3567 return table_alias 3568 3569 def _parse_subquery( 3570 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3571 ) -> t.Optional[exp.Subquery]: 3572 if not this: 3573 return None 3574 3575 return self.expression( 3576 exp.Subquery, 3577 this=this, 3578 pivots=self._parse_pivots(), 3579 alias=self._parse_table_alias() if parse_alias else None, 3580 sample=self._parse_table_sample(), 3581 ) 3582 3583 def _implicit_unnests_to_explicit(self, this: E) -> E: 3584 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3585 3586 refs = {_norm(this.args["from_"].this.copy(), dialect=self.dialect).alias_or_name} 3587 for i, join in enumerate(this.args.get("joins") or []): 3588 table = join.this 3589 normalized_table = table.copy() 3590 normalized_table.meta["maybe_column"] = True 3591 normalized_table = _norm(normalized_table, dialect=self.dialect) 3592 3593 if isinstance(table, exp.Table) and not join.args.get("on"): 3594 if normalized_table.parts[0].name in refs: 3595 table_as_column = table.to_column() 3596 unnest = exp.Unnest(expressions=[table_as_column]) 3597 3598 # Table.to_column creates a parent Alias node that we want to convert to 3599 # a TableAlias and attach to the Unnest, so it matches the parser's output 3600 if isinstance(table.args.get("alias"), exp.TableAlias): 3601 table_as_column.replace(table_as_column.this) 3602 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3603 3604 table.replace(unnest) 3605 3606 refs.add(normalized_table.alias_or_name) 3607 3608 return this 3609 3610 @t.overload 3611 def _parse_query_modifiers(self, this: E) -> E: ... 3612 3613 @t.overload 3614 def _parse_query_modifiers(self, this: None) -> None: ... 3615 3616 def _parse_query_modifiers(self, this): 3617 if isinstance(this, self.MODIFIABLES): 3618 for join in self._parse_joins(): 3619 this.append("joins", join) 3620 for lateral in iter(self._parse_lateral, None): 3621 this.append("laterals", lateral) 3622 3623 while True: 3624 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3625 modifier_token = self._curr 3626 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3627 key, expression = parser(self) 3628 3629 if expression: 3630 if this.args.get(key): 3631 self.raise_error( 3632 f"Found multiple '{modifier_token.text.upper()}' clauses", 3633 token=modifier_token, 3634 ) 3635 3636 this.set(key, expression) 3637 if key == "limit": 3638 offset = expression.args.get("offset") 3639 expression.set("offset", None) 3640 3641 if offset: 3642 offset = exp.Offset(expression=offset) 3643 this.set("offset", offset) 3644 3645 limit_by_expressions = expression.expressions 3646 expression.set("expressions", None) 3647 offset.set("expressions", limit_by_expressions) 3648 continue 3649 break 3650 3651 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from_"): 3652 this = self._implicit_unnests_to_explicit(this) 3653 3654 return this 3655 3656 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3657 start = self._curr 3658 while self._curr: 3659 self._advance() 3660 3661 end = self._tokens[self._index - 1] 3662 return exp.Hint(expressions=[self._find_sql(start, end)]) 3663 3664 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3665 return self._parse_function_call() 3666 3667 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3668 start_index = self._index 3669 should_fallback_to_string = False 3670 3671 hints = [] 3672 try: 3673 for hint in iter( 3674 lambda: self._parse_csv( 3675 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3676 ), 3677 [], 3678 ): 3679 hints.extend(hint) 3680 except ParseError: 3681 should_fallback_to_string = True 3682 3683 if should_fallback_to_string or self._curr: 3684 self._retreat(start_index) 3685 return self._parse_hint_fallback_to_string() 3686 3687 return self.expression(exp.Hint, expressions=hints) 3688 3689 def _parse_hint(self) -> t.Optional[exp.Hint]: 3690 if self._match(TokenType.HINT) and self._prev_comments: 3691 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3692 3693 return None 3694 3695 def _parse_into(self) -> t.Optional[exp.Into]: 3696 if not self._match(TokenType.INTO): 3697 return None 3698 3699 temp = self._match(TokenType.TEMPORARY) 3700 unlogged = self._match_text_seq("UNLOGGED") 3701 self._match(TokenType.TABLE) 3702 3703 return self.expression( 3704 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3705 ) 3706 3707 def _parse_from( 3708 self, 3709 joins: bool = False, 3710 skip_from_token: bool = False, 3711 consume_pipe: bool = False, 3712 ) -> t.Optional[exp.From]: 3713 if not skip_from_token and not self._match(TokenType.FROM): 3714 return None 3715 3716 return self.expression( 3717 exp.From, 3718 comments=self._prev_comments, 3719 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3720 ) 3721 3722 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3723 return self.expression( 3724 exp.MatchRecognizeMeasure, 3725 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3726 this=self._parse_expression(), 3727 ) 3728 3729 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3730 if not self._match(TokenType.MATCH_RECOGNIZE): 3731 return None 3732 3733 self._match_l_paren() 3734 3735 partition = self._parse_partition_by() 3736 order = self._parse_order() 3737 3738 measures = ( 3739 self._parse_csv(self._parse_match_recognize_measure) 3740 if self._match_text_seq("MEASURES") 3741 else None 3742 ) 3743 3744 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3745 rows = exp.var("ONE ROW PER MATCH") 3746 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3747 text = "ALL ROWS PER MATCH" 3748 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3749 text += " SHOW EMPTY MATCHES" 3750 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3751 text += " OMIT EMPTY MATCHES" 3752 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3753 text += " WITH UNMATCHED ROWS" 3754 rows = exp.var(text) 3755 else: 3756 rows = None 3757 3758 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3759 text = "AFTER MATCH SKIP" 3760 if self._match_text_seq("PAST", "LAST", "ROW"): 3761 text += " PAST LAST ROW" 3762 elif self._match_text_seq("TO", "NEXT", "ROW"): 3763 text += " TO NEXT ROW" 3764 elif self._match_text_seq("TO", "FIRST"): 3765 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3766 elif self._match_text_seq("TO", "LAST"): 3767 text += f" TO LAST {self._advance_any().text}" # type: ignore 3768 after = exp.var(text) 3769 else: 3770 after = None 3771 3772 if self._match_text_seq("PATTERN"): 3773 self._match_l_paren() 3774 3775 if not self._curr: 3776 self.raise_error("Expecting )", self._curr) 3777 3778 paren = 1 3779 start = self._curr 3780 3781 while self._curr and paren > 0: 3782 if self._curr.token_type == TokenType.L_PAREN: 3783 paren += 1 3784 if self._curr.token_type == TokenType.R_PAREN: 3785 paren -= 1 3786 3787 end = self._prev 3788 self._advance() 3789 3790 if paren > 0: 3791 self.raise_error("Expecting )", self._curr) 3792 3793 pattern = exp.var(self._find_sql(start, end)) 3794 else: 3795 pattern = None 3796 3797 define = ( 3798 self._parse_csv(self._parse_name_as_expression) 3799 if self._match_text_seq("DEFINE") 3800 else None 3801 ) 3802 3803 self._match_r_paren() 3804 3805 return self.expression( 3806 exp.MatchRecognize, 3807 partition_by=partition, 3808 order=order, 3809 measures=measures, 3810 rows=rows, 3811 after=after, 3812 pattern=pattern, 3813 define=define, 3814 alias=self._parse_table_alias(), 3815 ) 3816 3817 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3818 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3819 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3820 cross_apply = False 3821 3822 if cross_apply is not None: 3823 this = self._parse_select(table=True) 3824 view = None 3825 outer = None 3826 elif self._match(TokenType.LATERAL): 3827 this = self._parse_select(table=True) 3828 view = self._match(TokenType.VIEW) 3829 outer = self._match(TokenType.OUTER) 3830 else: 3831 return None 3832 3833 if not this: 3834 this = ( 3835 self._parse_unnest() 3836 or self._parse_function() 3837 or self._parse_id_var(any_token=False) 3838 ) 3839 3840 while self._match(TokenType.DOT): 3841 this = exp.Dot( 3842 this=this, 3843 expression=self._parse_function() or self._parse_id_var(any_token=False), 3844 ) 3845 3846 ordinality: t.Optional[bool] = None 3847 3848 if view: 3849 table = self._parse_id_var(any_token=False) 3850 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3851 table_alias: t.Optional[exp.TableAlias] = self.expression( 3852 exp.TableAlias, this=table, columns=columns 3853 ) 3854 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3855 # We move the alias from the lateral's child node to the lateral itself 3856 table_alias = this.args["alias"].pop() 3857 else: 3858 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3859 table_alias = self._parse_table_alias() 3860 3861 return self.expression( 3862 exp.Lateral, 3863 this=this, 3864 view=view, 3865 outer=outer, 3866 alias=table_alias, 3867 cross_apply=cross_apply, 3868 ordinality=ordinality, 3869 ) 3870 3871 def _parse_stream(self) -> t.Optional[exp.Stream]: 3872 index = self._index 3873 if self._match_text_seq("STREAM"): 3874 this = self._try_parse(self._parse_table) 3875 if this: 3876 return self.expression(exp.Stream, this=this) 3877 3878 self._retreat(index) 3879 return None 3880 3881 def _parse_join_parts( 3882 self, 3883 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3884 return ( 3885 self._match_set(self.JOIN_METHODS) and self._prev, 3886 self._match_set(self.JOIN_SIDES) and self._prev, 3887 self._match_set(self.JOIN_KINDS) and self._prev, 3888 ) 3889 3890 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3891 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3892 this = self._parse_column() 3893 if isinstance(this, exp.Column): 3894 return this.this 3895 return this 3896 3897 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3898 3899 def _parse_join( 3900 self, skip_join_token: bool = False, parse_bracket: bool = False 3901 ) -> t.Optional[exp.Join]: 3902 if self._match(TokenType.COMMA): 3903 table = self._try_parse(self._parse_table) 3904 cross_join = self.expression(exp.Join, this=table) if table else None 3905 3906 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3907 cross_join.set("kind", "CROSS") 3908 3909 return cross_join 3910 3911 index = self._index 3912 method, side, kind = self._parse_join_parts() 3913 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3914 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3915 join_comments = self._prev_comments 3916 3917 if not skip_join_token and not join: 3918 self._retreat(index) 3919 kind = None 3920 method = None 3921 side = None 3922 3923 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3924 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3925 3926 if not skip_join_token and not join and not outer_apply and not cross_apply: 3927 return None 3928 3929 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3930 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3931 kwargs["expressions"] = self._parse_csv( 3932 lambda: self._parse_table(parse_bracket=parse_bracket) 3933 ) 3934 3935 if method: 3936 kwargs["method"] = method.text.upper() 3937 if side: 3938 kwargs["side"] = side.text.upper() 3939 if kind: 3940 kwargs["kind"] = kind.text.upper() 3941 if hint: 3942 kwargs["hint"] = hint 3943 3944 if self._match(TokenType.MATCH_CONDITION): 3945 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3946 3947 if self._match(TokenType.ON): 3948 kwargs["on"] = self._parse_disjunction() 3949 elif self._match(TokenType.USING): 3950 kwargs["using"] = self._parse_using_identifiers() 3951 elif ( 3952 not method 3953 and not (outer_apply or cross_apply) 3954 and not isinstance(kwargs["this"], exp.Unnest) 3955 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3956 ): 3957 index = self._index 3958 joins: t.Optional[list] = list(self._parse_joins()) 3959 3960 if joins and self._match(TokenType.ON): 3961 kwargs["on"] = self._parse_disjunction() 3962 elif joins and self._match(TokenType.USING): 3963 kwargs["using"] = self._parse_using_identifiers() 3964 else: 3965 joins = None 3966 self._retreat(index) 3967 3968 kwargs["this"].set("joins", joins if joins else None) 3969 3970 kwargs["pivots"] = self._parse_pivots() 3971 3972 comments = [c for token in (method, side, kind) if token for c in token.comments] 3973 comments = (join_comments or []) + comments 3974 3975 if ( 3976 self.ADD_JOIN_ON_TRUE 3977 and not kwargs.get("on") 3978 and not kwargs.get("using") 3979 and not kwargs.get("method") 3980 and kwargs.get("kind") in (None, "INNER", "OUTER") 3981 ): 3982 kwargs["on"] = exp.true() 3983 3984 return self.expression(exp.Join, comments=comments, **kwargs) 3985 3986 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3987 this = self._parse_disjunction() 3988 3989 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3990 return this 3991 3992 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3993 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3994 3995 return this 3996 3997 def _parse_index_params(self) -> exp.IndexParameters: 3998 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3999 4000 if self._match(TokenType.L_PAREN, advance=False): 4001 columns = self._parse_wrapped_csv(self._parse_with_operator) 4002 else: 4003 columns = None 4004 4005 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 4006 partition_by = self._parse_partition_by() 4007 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 4008 tablespace = ( 4009 self._parse_var(any_token=True) 4010 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 4011 else None 4012 ) 4013 where = self._parse_where() 4014 4015 on = self._parse_field() if self._match(TokenType.ON) else None 4016 4017 return self.expression( 4018 exp.IndexParameters, 4019 using=using, 4020 columns=columns, 4021 include=include, 4022 partition_by=partition_by, 4023 where=where, 4024 with_storage=with_storage, 4025 tablespace=tablespace, 4026 on=on, 4027 ) 4028 4029 def _parse_index( 4030 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 4031 ) -> t.Optional[exp.Index]: 4032 if index or anonymous: 4033 unique = None 4034 primary = None 4035 amp = None 4036 4037 self._match(TokenType.ON) 4038 self._match(TokenType.TABLE) # hive 4039 table = self._parse_table_parts(schema=True) 4040 else: 4041 unique = self._match(TokenType.UNIQUE) 4042 primary = self._match_text_seq("PRIMARY") 4043 amp = self._match_text_seq("AMP") 4044 4045 if not self._match(TokenType.INDEX): 4046 return None 4047 4048 index = self._parse_id_var() 4049 table = None 4050 4051 params = self._parse_index_params() 4052 4053 return self.expression( 4054 exp.Index, 4055 this=index, 4056 table=table, 4057 unique=unique, 4058 primary=primary, 4059 amp=amp, 4060 params=params, 4061 ) 4062 4063 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 4064 hints: t.List[exp.Expression] = [] 4065 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 4066 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 4067 hints.append( 4068 self.expression( 4069 exp.WithTableHint, 4070 expressions=self._parse_csv( 4071 lambda: self._parse_function() or self._parse_var(any_token=True) 4072 ), 4073 ) 4074 ) 4075 self._match_r_paren() 4076 else: 4077 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 4078 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 4079 hint = exp.IndexTableHint(this=self._prev.text.upper()) 4080 4081 self._match_set((TokenType.INDEX, TokenType.KEY)) 4082 if self._match(TokenType.FOR): 4083 hint.set("target", self._advance_any() and self._prev.text.upper()) 4084 4085 hint.set("expressions", self._parse_wrapped_id_vars()) 4086 hints.append(hint) 4087 4088 return hints or None 4089 4090 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4091 return ( 4092 (not schema and self._parse_function(optional_parens=False)) 4093 or self._parse_id_var(any_token=False) 4094 or self._parse_string_as_identifier() 4095 or self._parse_placeholder() 4096 ) 4097 4098 def _parse_table_parts( 4099 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4100 ) -> exp.Table: 4101 catalog = None 4102 db = None 4103 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4104 4105 while self._match(TokenType.DOT): 4106 if catalog: 4107 # This allows nesting the table in arbitrarily many dot expressions if needed 4108 table = self.expression( 4109 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4110 ) 4111 else: 4112 catalog = db 4113 db = table 4114 # "" used for tsql FROM a..b case 4115 table = self._parse_table_part(schema=schema) or "" 4116 4117 if ( 4118 wildcard 4119 and self._is_connected() 4120 and (isinstance(table, exp.Identifier) or not table) 4121 and self._match(TokenType.STAR) 4122 ): 4123 if isinstance(table, exp.Identifier): 4124 table.args["this"] += "*" 4125 else: 4126 table = exp.Identifier(this="*") 4127 4128 # We bubble up comments from the Identifier to the Table 4129 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4130 4131 if is_db_reference: 4132 catalog = db 4133 db = table 4134 table = None 4135 4136 if not table and not is_db_reference: 4137 self.raise_error(f"Expected table name but got {self._curr}") 4138 if not db and is_db_reference: 4139 self.raise_error(f"Expected database name but got {self._curr}") 4140 4141 table = self.expression( 4142 exp.Table, 4143 comments=comments, 4144 this=table, 4145 db=db, 4146 catalog=catalog, 4147 ) 4148 4149 changes = self._parse_changes() 4150 if changes: 4151 table.set("changes", changes) 4152 4153 at_before = self._parse_historical_data() 4154 if at_before: 4155 table.set("when", at_before) 4156 4157 pivots = self._parse_pivots() 4158 if pivots: 4159 table.set("pivots", pivots) 4160 4161 return table 4162 4163 def _parse_table( 4164 self, 4165 schema: bool = False, 4166 joins: bool = False, 4167 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4168 parse_bracket: bool = False, 4169 is_db_reference: bool = False, 4170 parse_partition: bool = False, 4171 consume_pipe: bool = False, 4172 ) -> t.Optional[exp.Expression]: 4173 stream = self._parse_stream() 4174 if stream: 4175 return stream 4176 4177 lateral = self._parse_lateral() 4178 if lateral: 4179 return lateral 4180 4181 unnest = self._parse_unnest() 4182 if unnest: 4183 return unnest 4184 4185 values = self._parse_derived_table_values() 4186 if values: 4187 return values 4188 4189 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4190 if subquery: 4191 if not subquery.args.get("pivots"): 4192 subquery.set("pivots", self._parse_pivots()) 4193 return subquery 4194 4195 bracket = parse_bracket and self._parse_bracket(None) 4196 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4197 4198 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4199 self._parse_table 4200 ) 4201 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4202 4203 only = self._match(TokenType.ONLY) 4204 4205 this = t.cast( 4206 exp.Expression, 4207 bracket 4208 or rows_from 4209 or self._parse_bracket( 4210 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4211 ), 4212 ) 4213 4214 if only: 4215 this.set("only", only) 4216 4217 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4218 self._match_text_seq("*") 4219 4220 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4221 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4222 this.set("partition", self._parse_partition()) 4223 4224 if schema: 4225 return self._parse_schema(this=this) 4226 4227 version = self._parse_version() 4228 4229 if version: 4230 this.set("version", version) 4231 4232 if self.dialect.ALIAS_POST_TABLESAMPLE: 4233 this.set("sample", self._parse_table_sample()) 4234 4235 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4236 if alias: 4237 this.set("alias", alias) 4238 4239 if self._match(TokenType.INDEXED_BY): 4240 this.set("indexed", self._parse_table_parts()) 4241 elif self._match_text_seq("NOT", "INDEXED"): 4242 this.set("indexed", False) 4243 4244 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4245 return self.expression( 4246 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4247 ) 4248 4249 this.set("hints", self._parse_table_hints()) 4250 4251 if not this.args.get("pivots"): 4252 this.set("pivots", self._parse_pivots()) 4253 4254 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4255 this.set("sample", self._parse_table_sample()) 4256 4257 if joins: 4258 for join in self._parse_joins(): 4259 this.append("joins", join) 4260 4261 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4262 this.set("ordinality", True) 4263 this.set("alias", self._parse_table_alias()) 4264 4265 return this 4266 4267 def _parse_version(self) -> t.Optional[exp.Version]: 4268 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4269 this = "TIMESTAMP" 4270 elif self._match(TokenType.VERSION_SNAPSHOT): 4271 this = "VERSION" 4272 else: 4273 return None 4274 4275 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4276 kind = self._prev.text.upper() 4277 start = self._parse_bitwise() 4278 self._match_texts(("TO", "AND")) 4279 end = self._parse_bitwise() 4280 expression: t.Optional[exp.Expression] = self.expression( 4281 exp.Tuple, expressions=[start, end] 4282 ) 4283 elif self._match_text_seq("CONTAINED", "IN"): 4284 kind = "CONTAINED IN" 4285 expression = self.expression( 4286 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4287 ) 4288 elif self._match(TokenType.ALL): 4289 kind = "ALL" 4290 expression = None 4291 else: 4292 self._match_text_seq("AS", "OF") 4293 kind = "AS OF" 4294 expression = self._parse_type() 4295 4296 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4297 4298 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4299 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4300 index = self._index 4301 historical_data = None 4302 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4303 this = self._prev.text.upper() 4304 kind = ( 4305 self._match(TokenType.L_PAREN) 4306 and self._match_texts(self.HISTORICAL_DATA_KIND) 4307 and self._prev.text.upper() 4308 ) 4309 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4310 4311 if expression: 4312 self._match_r_paren() 4313 historical_data = self.expression( 4314 exp.HistoricalData, this=this, kind=kind, expression=expression 4315 ) 4316 else: 4317 self._retreat(index) 4318 4319 return historical_data 4320 4321 def _parse_changes(self) -> t.Optional[exp.Changes]: 4322 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4323 return None 4324 4325 information = self._parse_var(any_token=True) 4326 self._match_r_paren() 4327 4328 return self.expression( 4329 exp.Changes, 4330 information=information, 4331 at_before=self._parse_historical_data(), 4332 end=self._parse_historical_data(), 4333 ) 4334 4335 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4336 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4337 return None 4338 4339 self._advance() 4340 4341 expressions = self._parse_wrapped_csv(self._parse_equality) 4342 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4343 4344 alias = self._parse_table_alias() if with_alias else None 4345 4346 if alias: 4347 if self.dialect.UNNEST_COLUMN_ONLY: 4348 if alias.args.get("columns"): 4349 self.raise_error("Unexpected extra column alias in unnest.") 4350 4351 alias.set("columns", [alias.this]) 4352 alias.set("this", None) 4353 4354 columns = alias.args.get("columns") or [] 4355 if offset and len(expressions) < len(columns): 4356 offset = columns.pop() 4357 4358 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4359 self._match(TokenType.ALIAS) 4360 offset = self._parse_id_var( 4361 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4362 ) or exp.to_identifier("offset") 4363 4364 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4365 4366 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4367 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4368 if not is_derived and not ( 4369 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4370 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4371 ): 4372 return None 4373 4374 expressions = self._parse_csv(self._parse_value) 4375 alias = self._parse_table_alias() 4376 4377 if is_derived: 4378 self._match_r_paren() 4379 4380 return self.expression( 4381 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4382 ) 4383 4384 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4385 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4386 as_modifier and self._match_text_seq("USING", "SAMPLE") 4387 ): 4388 return None 4389 4390 bucket_numerator = None 4391 bucket_denominator = None 4392 bucket_field = None 4393 percent = None 4394 size = None 4395 seed = None 4396 4397 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4398 matched_l_paren = self._match(TokenType.L_PAREN) 4399 4400 if self.TABLESAMPLE_CSV: 4401 num = None 4402 expressions = self._parse_csv(self._parse_primary) 4403 else: 4404 expressions = None 4405 num = ( 4406 self._parse_factor() 4407 if self._match(TokenType.NUMBER, advance=False) 4408 else self._parse_primary() or self._parse_placeholder() 4409 ) 4410 4411 if self._match_text_seq("BUCKET"): 4412 bucket_numerator = self._parse_number() 4413 self._match_text_seq("OUT", "OF") 4414 bucket_denominator = bucket_denominator = self._parse_number() 4415 self._match(TokenType.ON) 4416 bucket_field = self._parse_field() 4417 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4418 percent = num 4419 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4420 size = num 4421 else: 4422 percent = num 4423 4424 if matched_l_paren: 4425 self._match_r_paren() 4426 4427 if self._match(TokenType.L_PAREN): 4428 method = self._parse_var(upper=True) 4429 seed = self._match(TokenType.COMMA) and self._parse_number() 4430 self._match_r_paren() 4431 elif self._match_texts(("SEED", "REPEATABLE")): 4432 seed = self._parse_wrapped(self._parse_number) 4433 4434 if not method and self.DEFAULT_SAMPLING_METHOD: 4435 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4436 4437 return self.expression( 4438 exp.TableSample, 4439 expressions=expressions, 4440 method=method, 4441 bucket_numerator=bucket_numerator, 4442 bucket_denominator=bucket_denominator, 4443 bucket_field=bucket_field, 4444 percent=percent, 4445 size=size, 4446 seed=seed, 4447 ) 4448 4449 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4450 return list(iter(self._parse_pivot, None)) or None 4451 4452 def _parse_joins(self) -> t.Iterator[exp.Join]: 4453 return iter(self._parse_join, None) 4454 4455 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4456 if not self._match(TokenType.INTO): 4457 return None 4458 4459 return self.expression( 4460 exp.UnpivotColumns, 4461 this=self._match_text_seq("NAME") and self._parse_column(), 4462 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4463 ) 4464 4465 # https://duckdb.org/docs/sql/statements/pivot 4466 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4467 def _parse_on() -> t.Optional[exp.Expression]: 4468 this = self._parse_bitwise() 4469 4470 if self._match(TokenType.IN): 4471 # PIVOT ... ON col IN (row_val1, row_val2) 4472 return self._parse_in(this) 4473 if self._match(TokenType.ALIAS, advance=False): 4474 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4475 return self._parse_alias(this) 4476 4477 return this 4478 4479 this = self._parse_table() 4480 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4481 into = self._parse_unpivot_columns() 4482 using = self._match(TokenType.USING) and self._parse_csv( 4483 lambda: self._parse_alias(self._parse_column()) 4484 ) 4485 group = self._parse_group() 4486 4487 return self.expression( 4488 exp.Pivot, 4489 this=this, 4490 expressions=expressions, 4491 using=using, 4492 group=group, 4493 unpivot=is_unpivot, 4494 into=into, 4495 ) 4496 4497 def _parse_pivot_in(self) -> exp.In: 4498 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4499 this = self._parse_select_or_expression() 4500 4501 self._match(TokenType.ALIAS) 4502 alias = self._parse_bitwise() 4503 if alias: 4504 if isinstance(alias, exp.Column) and not alias.db: 4505 alias = alias.this 4506 return self.expression(exp.PivotAlias, this=this, alias=alias) 4507 4508 return this 4509 4510 value = self._parse_column() 4511 4512 if not self._match(TokenType.IN): 4513 self.raise_error("Expecting IN") 4514 4515 if self._match(TokenType.L_PAREN): 4516 if self._match(TokenType.ANY): 4517 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4518 else: 4519 exprs = self._parse_csv(_parse_aliased_expression) 4520 self._match_r_paren() 4521 return self.expression(exp.In, this=value, expressions=exprs) 4522 4523 return self.expression(exp.In, this=value, field=self._parse_id_var()) 4524 4525 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4526 func = self._parse_function() 4527 if not func: 4528 if self._prev and self._prev.token_type == TokenType.COMMA: 4529 return None 4530 self.raise_error("Expecting an aggregation function in PIVOT") 4531 4532 return self._parse_alias(func) 4533 4534 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4535 index = self._index 4536 include_nulls = None 4537 4538 if self._match(TokenType.PIVOT): 4539 unpivot = False 4540 elif self._match(TokenType.UNPIVOT): 4541 unpivot = True 4542 4543 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4544 if self._match_text_seq("INCLUDE", "NULLS"): 4545 include_nulls = True 4546 elif self._match_text_seq("EXCLUDE", "NULLS"): 4547 include_nulls = False 4548 else: 4549 return None 4550 4551 expressions = [] 4552 4553 if not self._match(TokenType.L_PAREN): 4554 self._retreat(index) 4555 return None 4556 4557 if unpivot: 4558 expressions = self._parse_csv(self._parse_column) 4559 else: 4560 expressions = self._parse_csv(self._parse_pivot_aggregation) 4561 4562 if not expressions: 4563 self.raise_error("Failed to parse PIVOT's aggregation list") 4564 4565 if not self._match(TokenType.FOR): 4566 self.raise_error("Expecting FOR") 4567 4568 fields = [] 4569 while True: 4570 field = self._try_parse(self._parse_pivot_in) 4571 if not field: 4572 break 4573 fields.append(field) 4574 4575 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4576 self._parse_bitwise 4577 ) 4578 4579 group = self._parse_group() 4580 4581 self._match_r_paren() 4582 4583 pivot = self.expression( 4584 exp.Pivot, 4585 expressions=expressions, 4586 fields=fields, 4587 unpivot=unpivot, 4588 include_nulls=include_nulls, 4589 default_on_null=default_on_null, 4590 group=group, 4591 ) 4592 4593 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4594 pivot.set("alias", self._parse_table_alias()) 4595 4596 if not unpivot: 4597 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4598 4599 columns: t.List[exp.Expression] = [] 4600 all_fields = [] 4601 for pivot_field in pivot.fields: 4602 pivot_field_expressions = pivot_field.expressions 4603 4604 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4605 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4606 continue 4607 4608 all_fields.append( 4609 [ 4610 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4611 for fld in pivot_field_expressions 4612 ] 4613 ) 4614 4615 if all_fields: 4616 if names: 4617 all_fields.append(names) 4618 4619 # Generate all possible combinations of the pivot columns 4620 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4621 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4622 for fld_parts_tuple in itertools.product(*all_fields): 4623 fld_parts = list(fld_parts_tuple) 4624 4625 if names and self.PREFIXED_PIVOT_COLUMNS: 4626 # Move the "name" to the front of the list 4627 fld_parts.insert(0, fld_parts.pop(-1)) 4628 4629 columns.append(exp.to_identifier("_".join(fld_parts))) 4630 4631 pivot.set("columns", columns) 4632 4633 return pivot 4634 4635 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4636 return [agg.alias for agg in aggregations if agg.alias] 4637 4638 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4639 if not skip_where_token and not self._match(TokenType.PREWHERE): 4640 return None 4641 4642 return self.expression( 4643 exp.PreWhere, comments=self._prev_comments, this=self._parse_disjunction() 4644 ) 4645 4646 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4647 if not skip_where_token and not self._match(TokenType.WHERE): 4648 return None 4649 4650 return self.expression( 4651 exp.Where, comments=self._prev_comments, this=self._parse_disjunction() 4652 ) 4653 4654 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4655 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4656 return None 4657 comments = self._prev_comments 4658 4659 elements: t.Dict[str, t.Any] = defaultdict(list) 4660 4661 if self._match(TokenType.ALL): 4662 elements["all"] = True 4663 elif self._match(TokenType.DISTINCT): 4664 elements["all"] = False 4665 4666 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4667 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4668 4669 while True: 4670 index = self._index 4671 4672 elements["expressions"].extend( 4673 self._parse_csv( 4674 lambda: None 4675 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4676 else self._parse_disjunction() 4677 ) 4678 ) 4679 4680 before_with_index = self._index 4681 with_prefix = self._match(TokenType.WITH) 4682 4683 if cube_or_rollup := self._parse_cube_or_rollup(with_prefix=with_prefix): 4684 key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube" 4685 elements[key].append(cube_or_rollup) 4686 elif grouping_sets := self._parse_grouping_sets(): 4687 elements["grouping_sets"].append(grouping_sets) 4688 elif self._match_text_seq("TOTALS"): 4689 elements["totals"] = True # type: ignore 4690 4691 if before_with_index <= self._index <= before_with_index + 1: 4692 self._retreat(before_with_index) 4693 break 4694 4695 if index == self._index: 4696 break 4697 4698 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4699 4700 def _parse_cube_or_rollup(self, with_prefix: bool = False) -> t.Optional[exp.Cube | exp.Rollup]: 4701 if self._match(TokenType.CUBE): 4702 kind: t.Type[exp.Cube | exp.Rollup] = exp.Cube 4703 elif self._match(TokenType.ROLLUP): 4704 kind = exp.Rollup 4705 else: 4706 return None 4707 4708 return self.expression( 4709 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_bitwise) 4710 ) 4711 4712 def _parse_grouping_sets(self) -> t.Optional[exp.GroupingSets]: 4713 if self._match(TokenType.GROUPING_SETS): 4714 return self.expression( 4715 exp.GroupingSets, expressions=self._parse_wrapped_csv(self._parse_grouping_set) 4716 ) 4717 return None 4718 4719 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4720 return self._parse_grouping_sets() or self._parse_cube_or_rollup() or self._parse_bitwise() 4721 4722 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4723 if not skip_having_token and not self._match(TokenType.HAVING): 4724 return None 4725 return self.expression( 4726 exp.Having, comments=self._prev_comments, this=self._parse_disjunction() 4727 ) 4728 4729 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4730 if not self._match(TokenType.QUALIFY): 4731 return None 4732 return self.expression(exp.Qualify, this=self._parse_disjunction()) 4733 4734 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4735 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4736 exp.Prior, this=self._parse_bitwise() 4737 ) 4738 connect = self._parse_disjunction() 4739 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4740 return connect 4741 4742 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4743 if skip_start_token: 4744 start = None 4745 elif self._match(TokenType.START_WITH): 4746 start = self._parse_disjunction() 4747 else: 4748 return None 4749 4750 self._match(TokenType.CONNECT_BY) 4751 nocycle = self._match_text_seq("NOCYCLE") 4752 connect = self._parse_connect_with_prior() 4753 4754 if not start and self._match(TokenType.START_WITH): 4755 start = self._parse_disjunction() 4756 4757 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4758 4759 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4760 this = self._parse_id_var(any_token=True) 4761 if self._match(TokenType.ALIAS): 4762 this = self.expression(exp.Alias, alias=this, this=self._parse_disjunction()) 4763 return this 4764 4765 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4766 if self._match_text_seq("INTERPOLATE"): 4767 return self._parse_wrapped_csv(self._parse_name_as_expression) 4768 return None 4769 4770 def _parse_order( 4771 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4772 ) -> t.Optional[exp.Expression]: 4773 siblings = None 4774 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4775 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4776 return this 4777 4778 siblings = True 4779 4780 return self.expression( 4781 exp.Order, 4782 comments=self._prev_comments, 4783 this=this, 4784 expressions=self._parse_csv(self._parse_ordered), 4785 siblings=siblings, 4786 ) 4787 4788 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4789 if not self._match(token): 4790 return None 4791 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4792 4793 def _parse_ordered( 4794 self, parse_method: t.Optional[t.Callable] = None 4795 ) -> t.Optional[exp.Ordered]: 4796 this = parse_method() if parse_method else self._parse_disjunction() 4797 if not this: 4798 return None 4799 4800 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4801 this = exp.var("ALL") 4802 4803 asc = self._match(TokenType.ASC) 4804 desc = self._match(TokenType.DESC) or (asc and False) 4805 4806 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4807 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4808 4809 nulls_first = is_nulls_first or False 4810 explicitly_null_ordered = is_nulls_first or is_nulls_last 4811 4812 if ( 4813 not explicitly_null_ordered 4814 and ( 4815 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4816 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4817 ) 4818 and self.dialect.NULL_ORDERING != "nulls_are_last" 4819 ): 4820 nulls_first = True 4821 4822 if self._match_text_seq("WITH", "FILL"): 4823 with_fill = self.expression( 4824 exp.WithFill, 4825 from_=self._match(TokenType.FROM) and self._parse_bitwise(), 4826 to=self._match_text_seq("TO") and self._parse_bitwise(), 4827 step=self._match_text_seq("STEP") and self._parse_bitwise(), 4828 interpolate=self._parse_interpolate(), 4829 ) 4830 else: 4831 with_fill = None 4832 4833 return self.expression( 4834 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4835 ) 4836 4837 def _parse_limit_options(self) -> t.Optional[exp.LimitOptions]: 4838 percent = self._match_set((TokenType.PERCENT, TokenType.MOD)) 4839 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4840 self._match_text_seq("ONLY") 4841 with_ties = self._match_text_seq("WITH", "TIES") 4842 4843 if not (percent or rows or with_ties): 4844 return None 4845 4846 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4847 4848 def _parse_limit( 4849 self, 4850 this: t.Optional[exp.Expression] = None, 4851 top: bool = False, 4852 skip_limit_token: bool = False, 4853 ) -> t.Optional[exp.Expression]: 4854 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4855 comments = self._prev_comments 4856 if top: 4857 limit_paren = self._match(TokenType.L_PAREN) 4858 expression = self._parse_term() if limit_paren else self._parse_number() 4859 4860 if limit_paren: 4861 self._match_r_paren() 4862 4863 else: 4864 # Parsing LIMIT x% (i.e x PERCENT) as a term leads to an error, since 4865 # we try to build an exp.Mod expr. For that matter, we backtrack and instead 4866 # consume the factor plus parse the percentage separately 4867 index = self._index 4868 expression = self._try_parse(self._parse_term) 4869 if isinstance(expression, exp.Mod): 4870 self._retreat(index) 4871 expression = self._parse_factor() 4872 elif not expression: 4873 expression = self._parse_factor() 4874 limit_options = self._parse_limit_options() 4875 4876 if self._match(TokenType.COMMA): 4877 offset = expression 4878 expression = self._parse_term() 4879 else: 4880 offset = None 4881 4882 limit_exp = self.expression( 4883 exp.Limit, 4884 this=this, 4885 expression=expression, 4886 offset=offset, 4887 comments=comments, 4888 limit_options=limit_options, 4889 expressions=self._parse_limit_by(), 4890 ) 4891 4892 return limit_exp 4893 4894 if self._match(TokenType.FETCH): 4895 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4896 direction = self._prev.text.upper() if direction else "FIRST" 4897 4898 count = self._parse_field(tokens=self.FETCH_TOKENS) 4899 4900 return self.expression( 4901 exp.Fetch, 4902 direction=direction, 4903 count=count, 4904 limit_options=self._parse_limit_options(), 4905 ) 4906 4907 return this 4908 4909 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4910 if not self._match(TokenType.OFFSET): 4911 return this 4912 4913 count = self._parse_term() 4914 self._match_set((TokenType.ROW, TokenType.ROWS)) 4915 4916 return self.expression( 4917 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4918 ) 4919 4920 def _can_parse_limit_or_offset(self) -> bool: 4921 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4922 return False 4923 4924 index = self._index 4925 result = bool( 4926 self._try_parse(self._parse_limit, retreat=True) 4927 or self._try_parse(self._parse_offset, retreat=True) 4928 ) 4929 self._retreat(index) 4930 return result 4931 4932 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4933 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4934 4935 def _parse_locks(self) -> t.List[exp.Lock]: 4936 locks = [] 4937 while True: 4938 update, key = None, None 4939 if self._match_text_seq("FOR", "UPDATE"): 4940 update = True 4941 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4942 "LOCK", "IN", "SHARE", "MODE" 4943 ): 4944 update = False 4945 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4946 update, key = False, True 4947 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4948 update, key = True, True 4949 else: 4950 break 4951 4952 expressions = None 4953 if self._match_text_seq("OF"): 4954 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4955 4956 wait: t.Optional[bool | exp.Expression] = None 4957 if self._match_text_seq("NOWAIT"): 4958 wait = True 4959 elif self._match_text_seq("WAIT"): 4960 wait = self._parse_primary() 4961 elif self._match_text_seq("SKIP", "LOCKED"): 4962 wait = False 4963 4964 locks.append( 4965 self.expression( 4966 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4967 ) 4968 ) 4969 4970 return locks 4971 4972 def parse_set_operation( 4973 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4974 ) -> t.Optional[exp.Expression]: 4975 start = self._index 4976 _, side_token, kind_token = self._parse_join_parts() 4977 4978 side = side_token.text if side_token else None 4979 kind = kind_token.text if kind_token else None 4980 4981 if not self._match_set(self.SET_OPERATIONS): 4982 self._retreat(start) 4983 return None 4984 4985 token_type = self._prev.token_type 4986 4987 if token_type == TokenType.UNION: 4988 operation: t.Type[exp.SetOperation] = exp.Union 4989 elif token_type == TokenType.EXCEPT: 4990 operation = exp.Except 4991 else: 4992 operation = exp.Intersect 4993 4994 comments = self._prev.comments 4995 4996 if self._match(TokenType.DISTINCT): 4997 distinct: t.Optional[bool] = True 4998 elif self._match(TokenType.ALL): 4999 distinct = False 5000 else: 5001 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 5002 if distinct is None: 5003 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 5004 5005 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 5006 "STRICT", "CORRESPONDING" 5007 ) 5008 if self._match_text_seq("CORRESPONDING"): 5009 by_name = True 5010 if not side and not kind: 5011 kind = "INNER" 5012 5013 on_column_list = None 5014 if by_name and self._match_texts(("ON", "BY")): 5015 on_column_list = self._parse_wrapped_csv(self._parse_column) 5016 5017 expression = self._parse_select( 5018 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 5019 ) 5020 5021 return self.expression( 5022 operation, 5023 comments=comments, 5024 this=this, 5025 distinct=distinct, 5026 by_name=by_name, 5027 expression=expression, 5028 side=side, 5029 kind=kind, 5030 on=on_column_list, 5031 ) 5032 5033 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5034 while this: 5035 setop = self.parse_set_operation(this) 5036 if not setop: 5037 break 5038 this = setop 5039 5040 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 5041 expression = this.expression 5042 5043 if expression: 5044 for arg in self.SET_OP_MODIFIERS: 5045 expr = expression.args.get(arg) 5046 if expr: 5047 this.set(arg, expr.pop()) 5048 5049 return this 5050 5051 def _parse_expression(self) -> t.Optional[exp.Expression]: 5052 return self._parse_alias(self._parse_assignment()) 5053 5054 def _parse_assignment(self) -> t.Optional[exp.Expression]: 5055 this = self._parse_disjunction() 5056 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 5057 # This allows us to parse <non-identifier token> := <expr> 5058 this = exp.column( 5059 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 5060 ) 5061 5062 while self._match_set(self.ASSIGNMENT): 5063 if isinstance(this, exp.Column) and len(this.parts) == 1: 5064 this = this.this 5065 5066 this = self.expression( 5067 self.ASSIGNMENT[self._prev.token_type], 5068 this=this, 5069 comments=self._prev_comments, 5070 expression=self._parse_assignment(), 5071 ) 5072 5073 return this 5074 5075 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 5076 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 5077 5078 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 5079 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 5080 5081 def _parse_equality(self) -> t.Optional[exp.Expression]: 5082 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 5083 5084 def _parse_comparison(self) -> t.Optional[exp.Expression]: 5085 return self._parse_tokens(self._parse_range, self.COMPARISON) 5086 5087 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5088 this = this or self._parse_bitwise() 5089 negate = self._match(TokenType.NOT) 5090 5091 if self._match_set(self.RANGE_PARSERS): 5092 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 5093 if not expression: 5094 return this 5095 5096 this = expression 5097 elif self._match(TokenType.ISNULL) or (negate and self._match(TokenType.NULL)): 5098 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5099 5100 # Postgres supports ISNULL and NOTNULL for conditions. 5101 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5102 if self._match(TokenType.NOTNULL): 5103 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5104 this = self.expression(exp.Not, this=this) 5105 5106 if negate: 5107 this = self._negate_range(this) 5108 5109 if self._match(TokenType.IS): 5110 this = self._parse_is(this) 5111 5112 return this 5113 5114 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5115 if not this: 5116 return this 5117 5118 return self.expression(exp.Not, this=this) 5119 5120 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5121 index = self._index - 1 5122 negate = self._match(TokenType.NOT) 5123 5124 if self._match_text_seq("DISTINCT", "FROM"): 5125 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5126 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5127 5128 if self._match(TokenType.JSON): 5129 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5130 5131 if self._match_text_seq("WITH"): 5132 _with = True 5133 elif self._match_text_seq("WITHOUT"): 5134 _with = False 5135 else: 5136 _with = None 5137 5138 unique = self._match(TokenType.UNIQUE) 5139 self._match_text_seq("KEYS") 5140 expression: t.Optional[exp.Expression] = self.expression( 5141 exp.JSON, 5142 this=kind, 5143 with_=_with, 5144 unique=unique, 5145 ) 5146 else: 5147 expression = self._parse_null() or self._parse_bitwise() 5148 if not expression: 5149 self._retreat(index) 5150 return None 5151 5152 this = self.expression(exp.Is, this=this, expression=expression) 5153 this = self.expression(exp.Not, this=this) if negate else this 5154 return self._parse_column_ops(this) 5155 5156 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5157 unnest = self._parse_unnest(with_alias=False) 5158 if unnest: 5159 this = self.expression(exp.In, this=this, unnest=unnest) 5160 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5161 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5162 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5163 5164 if len(expressions) == 1 and isinstance(query := expressions[0], exp.Query): 5165 this = self.expression( 5166 exp.In, 5167 this=this, 5168 query=self._parse_query_modifiers(query).subquery(copy=False), 5169 ) 5170 else: 5171 this = self.expression(exp.In, this=this, expressions=expressions) 5172 5173 if matched_l_paren: 5174 self._match_r_paren(this) 5175 elif not self._match(TokenType.R_BRACKET, expression=this): 5176 self.raise_error("Expecting ]") 5177 else: 5178 this = self.expression(exp.In, this=this, field=self._parse_column()) 5179 5180 return this 5181 5182 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5183 symmetric = None 5184 if self._match_text_seq("SYMMETRIC"): 5185 symmetric = True 5186 elif self._match_text_seq("ASYMMETRIC"): 5187 symmetric = False 5188 5189 low = self._parse_bitwise() 5190 self._match(TokenType.AND) 5191 high = self._parse_bitwise() 5192 5193 return self.expression( 5194 exp.Between, 5195 this=this, 5196 low=low, 5197 high=high, 5198 symmetric=symmetric, 5199 ) 5200 5201 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5202 if not self._match(TokenType.ESCAPE): 5203 return this 5204 return self.expression( 5205 exp.Escape, this=this, expression=self._parse_string() or self._parse_null() 5206 ) 5207 5208 def _parse_interval_span(self, this: exp.Expression) -> exp.Interval: 5209 # handle day-time format interval span with omitted units: 5210 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5211 interval_span_units_omitted = None 5212 if ( 5213 this 5214 and this.is_string 5215 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5216 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5217 ): 5218 index = self._index 5219 5220 # Var "TO" Var 5221 first_unit = self._parse_var(any_token=True, upper=True) 5222 second_unit = None 5223 if first_unit and self._match_text_seq("TO"): 5224 second_unit = self._parse_var(any_token=True, upper=True) 5225 5226 interval_span_units_omitted = not (first_unit and second_unit) 5227 5228 self._retreat(index) 5229 5230 unit = ( 5231 None 5232 if interval_span_units_omitted 5233 else ( 5234 self._parse_function() 5235 or ( 5236 not self._match(TokenType.ALIAS, advance=False) 5237 and self._parse_var(any_token=True, upper=True) 5238 ) 5239 ) 5240 ) 5241 5242 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5243 # each INTERVAL expression into this canonical form so it's easy to transpile 5244 if this and this.is_number: 5245 this = exp.Literal.string(this.to_py()) 5246 elif this and this.is_string: 5247 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5248 if parts and unit: 5249 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5250 unit = None 5251 self._retreat(self._index - 1) 5252 5253 if len(parts) == 1: 5254 this = exp.Literal.string(parts[0][0]) 5255 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5256 5257 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5258 unit = self.expression( 5259 exp.IntervalSpan, 5260 this=unit, 5261 expression=self._parse_function() or self._parse_var(any_token=True, upper=True), 5262 ) 5263 5264 return self.expression(exp.Interval, this=this, unit=unit) 5265 5266 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5267 index = self._index 5268 5269 if not self._match(TokenType.INTERVAL) and match_interval: 5270 return None 5271 5272 if self._match(TokenType.STRING, advance=False): 5273 this = self._parse_primary() 5274 else: 5275 this = self._parse_term() 5276 5277 if not this or ( 5278 isinstance(this, exp.Column) 5279 and not this.table 5280 and not this.this.quoted 5281 and self._curr 5282 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 5283 ): 5284 self._retreat(index) 5285 return None 5286 5287 interval = self._parse_interval_span(this) 5288 5289 index = self._index 5290 self._match(TokenType.PLUS) 5291 5292 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5293 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5294 return self.expression( 5295 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5296 ) 5297 5298 self._retreat(index) 5299 return interval 5300 5301 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5302 this = self._parse_term() 5303 5304 while True: 5305 if self._match_set(self.BITWISE): 5306 this = self.expression( 5307 self.BITWISE[self._prev.token_type], 5308 this=this, 5309 expression=self._parse_term(), 5310 ) 5311 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5312 this = self.expression( 5313 exp.DPipe, 5314 this=this, 5315 expression=self._parse_term(), 5316 safe=not self.dialect.STRICT_STRING_CONCAT, 5317 ) 5318 elif self._match(TokenType.DQMARK): 5319 this = self.expression( 5320 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5321 ) 5322 elif self._match_pair(TokenType.LT, TokenType.LT): 5323 this = self.expression( 5324 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5325 ) 5326 elif self._match_pair(TokenType.GT, TokenType.GT): 5327 this = self.expression( 5328 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5329 ) 5330 else: 5331 break 5332 5333 return this 5334 5335 def _parse_term(self) -> t.Optional[exp.Expression]: 5336 this = self._parse_factor() 5337 5338 while self._match_set(self.TERM): 5339 klass = self.TERM[self._prev.token_type] 5340 comments = self._prev_comments 5341 expression = self._parse_factor() 5342 5343 this = self.expression(klass, this=this, comments=comments, expression=expression) 5344 5345 if isinstance(this, exp.Collate): 5346 expr = this.expression 5347 5348 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5349 # fallback to Identifier / Var 5350 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5351 ident = expr.this 5352 if isinstance(ident, exp.Identifier): 5353 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5354 5355 return this 5356 5357 def _parse_factor(self) -> t.Optional[exp.Expression]: 5358 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5359 this = self._parse_at_time_zone(parse_method()) 5360 5361 while self._match_set(self.FACTOR): 5362 klass = self.FACTOR[self._prev.token_type] 5363 comments = self._prev_comments 5364 expression = parse_method() 5365 5366 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5367 self._retreat(self._index - 1) 5368 return this 5369 5370 this = self.expression(klass, this=this, comments=comments, expression=expression) 5371 5372 if isinstance(this, exp.Div): 5373 this.set("typed", self.dialect.TYPED_DIVISION) 5374 this.set("safe", self.dialect.SAFE_DIVISION) 5375 5376 return this 5377 5378 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5379 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5380 5381 def _parse_unary(self) -> t.Optional[exp.Expression]: 5382 if self._match_set(self.UNARY_PARSERS): 5383 return self.UNARY_PARSERS[self._prev.token_type](self) 5384 return self._parse_type() 5385 5386 def _parse_type( 5387 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5388 ) -> t.Optional[exp.Expression]: 5389 interval = parse_interval and self._parse_interval() 5390 if interval: 5391 return self._parse_column_ops(interval) 5392 5393 index = self._index 5394 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5395 5396 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5397 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5398 if isinstance(data_type, exp.Cast): 5399 # This constructor can contain ops directly after it, for instance struct unnesting: 5400 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5401 return self._parse_column_ops(data_type) 5402 5403 if data_type: 5404 index2 = self._index 5405 this = self._parse_primary() 5406 5407 if isinstance(this, exp.Literal): 5408 literal = this.name 5409 this = self._parse_column_ops(this) 5410 5411 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5412 if parser: 5413 return parser(self, this, data_type) 5414 5415 if ( 5416 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5417 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5418 and TIME_ZONE_RE.search(literal) 5419 ): 5420 data_type = exp.DataType.build("TIMESTAMPTZ") 5421 5422 return self.expression(exp.Cast, this=this, to=data_type) 5423 5424 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5425 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5426 # 5427 # If the index difference here is greater than 1, that means the parser itself must have 5428 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5429 # 5430 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5431 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5432 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5433 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5434 # 5435 # In these cases, we don't really want to return the converted type, but instead retreat 5436 # and try to parse a Column or Identifier in the section below. 5437 if data_type.expressions and index2 - index > 1: 5438 self._retreat(index2) 5439 return self._parse_column_ops(data_type) 5440 5441 self._retreat(index) 5442 5443 if fallback_to_identifier: 5444 return self._parse_id_var() 5445 5446 this = self._parse_column() 5447 return this and self._parse_column_ops(this) 5448 5449 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5450 this = self._parse_type() 5451 if not this: 5452 return None 5453 5454 if isinstance(this, exp.Column) and not this.table: 5455 this = exp.var(this.name.upper()) 5456 5457 return self.expression( 5458 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5459 ) 5460 5461 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5462 type_name = identifier.name 5463 5464 while self._match(TokenType.DOT): 5465 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5466 5467 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5468 5469 def _parse_types( 5470 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5471 ) -> t.Optional[exp.Expression]: 5472 index = self._index 5473 5474 this: t.Optional[exp.Expression] = None 5475 prefix = self._match_text_seq("SYSUDTLIB", ".") 5476 5477 if self._match_set(self.TYPE_TOKENS): 5478 type_token = self._prev.token_type 5479 else: 5480 type_token = None 5481 identifier = allow_identifiers and self._parse_id_var( 5482 any_token=False, tokens=(TokenType.VAR,) 5483 ) 5484 if isinstance(identifier, exp.Identifier): 5485 try: 5486 tokens = self.dialect.tokenize(identifier.name) 5487 except TokenError: 5488 tokens = None 5489 5490 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5491 type_token = tokens[0].token_type 5492 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5493 this = self._parse_user_defined_type(identifier) 5494 else: 5495 self._retreat(self._index - 1) 5496 return None 5497 else: 5498 return None 5499 5500 if type_token == TokenType.PSEUDO_TYPE: 5501 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5502 5503 if type_token == TokenType.OBJECT_IDENTIFIER: 5504 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5505 5506 # https://materialize.com/docs/sql/types/map/ 5507 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5508 key_type = self._parse_types( 5509 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5510 ) 5511 if not self._match(TokenType.FARROW): 5512 self._retreat(index) 5513 return None 5514 5515 value_type = self._parse_types( 5516 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5517 ) 5518 if not self._match(TokenType.R_BRACKET): 5519 self._retreat(index) 5520 return None 5521 5522 return exp.DataType( 5523 this=exp.DataType.Type.MAP, 5524 expressions=[key_type, value_type], 5525 nested=True, 5526 prefix=prefix, 5527 ) 5528 5529 nested = type_token in self.NESTED_TYPE_TOKENS 5530 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5531 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5532 expressions = None 5533 maybe_func = False 5534 5535 if self._match(TokenType.L_PAREN): 5536 if is_struct: 5537 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5538 elif nested: 5539 expressions = self._parse_csv( 5540 lambda: self._parse_types( 5541 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5542 ) 5543 ) 5544 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5545 this = expressions[0] 5546 this.set("nullable", True) 5547 self._match_r_paren() 5548 return this 5549 elif type_token in self.ENUM_TYPE_TOKENS: 5550 expressions = self._parse_csv(self._parse_equality) 5551 elif is_aggregate: 5552 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5553 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5554 ) 5555 if not func_or_ident: 5556 return None 5557 expressions = [func_or_ident] 5558 if self._match(TokenType.COMMA): 5559 expressions.extend( 5560 self._parse_csv( 5561 lambda: self._parse_types( 5562 check_func=check_func, 5563 schema=schema, 5564 allow_identifiers=allow_identifiers, 5565 ) 5566 ) 5567 ) 5568 else: 5569 expressions = self._parse_csv(self._parse_type_size) 5570 5571 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5572 if type_token == TokenType.VECTOR and len(expressions) == 2: 5573 expressions = self._parse_vector_expressions(expressions) 5574 5575 if not self._match(TokenType.R_PAREN): 5576 self._retreat(index) 5577 return None 5578 5579 maybe_func = True 5580 5581 values: t.Optional[t.List[exp.Expression]] = None 5582 5583 if nested and self._match(TokenType.LT): 5584 if is_struct: 5585 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5586 else: 5587 expressions = self._parse_csv( 5588 lambda: self._parse_types( 5589 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5590 ) 5591 ) 5592 5593 if not self._match(TokenType.GT): 5594 self.raise_error("Expecting >") 5595 5596 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5597 values = self._parse_csv(self._parse_disjunction) 5598 if not values and is_struct: 5599 values = None 5600 self._retreat(self._index - 1) 5601 else: 5602 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5603 5604 if type_token in self.TIMESTAMPS: 5605 if self._match_text_seq("WITH", "TIME", "ZONE"): 5606 maybe_func = False 5607 tz_type = ( 5608 exp.DataType.Type.TIMETZ 5609 if type_token in self.TIMES 5610 else exp.DataType.Type.TIMESTAMPTZ 5611 ) 5612 this = exp.DataType(this=tz_type, expressions=expressions) 5613 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5614 maybe_func = False 5615 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5616 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5617 maybe_func = False 5618 elif type_token == TokenType.INTERVAL: 5619 if self._curr and self._curr.text.upper() in self.dialect.VALID_INTERVAL_UNITS: 5620 unit = self._parse_var(upper=True) 5621 if self._match_text_seq("TO"): 5622 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5623 5624 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5625 else: 5626 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5627 elif type_token == TokenType.VOID: 5628 this = exp.DataType(this=exp.DataType.Type.NULL) 5629 5630 if maybe_func and check_func: 5631 index2 = self._index 5632 peek = self._parse_string() 5633 5634 if not peek: 5635 self._retreat(index) 5636 return None 5637 5638 self._retreat(index2) 5639 5640 if not this: 5641 if self._match_text_seq("UNSIGNED"): 5642 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5643 if not unsigned_type_token: 5644 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5645 5646 type_token = unsigned_type_token or type_token 5647 5648 # NULLABLE without parentheses can be a column (Presto/Trino) 5649 if type_token == TokenType.NULLABLE and not expressions: 5650 self._retreat(index) 5651 return None 5652 5653 this = exp.DataType( 5654 this=exp.DataType.Type[type_token.value], 5655 expressions=expressions, 5656 nested=nested, 5657 prefix=prefix, 5658 ) 5659 5660 # Empty arrays/structs are allowed 5661 if values is not None: 5662 cls = exp.Struct if is_struct else exp.Array 5663 this = exp.cast(cls(expressions=values), this, copy=False) 5664 5665 elif expressions: 5666 this.set("expressions", expressions) 5667 5668 # https://materialize.com/docs/sql/types/list/#type-name 5669 while self._match(TokenType.LIST): 5670 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5671 5672 index = self._index 5673 5674 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5675 matched_array = self._match(TokenType.ARRAY) 5676 5677 while self._curr: 5678 datatype_token = self._prev.token_type 5679 matched_l_bracket = self._match(TokenType.L_BRACKET) 5680 5681 if (not matched_l_bracket and not matched_array) or ( 5682 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5683 ): 5684 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5685 # not to be confused with the fixed size array parsing 5686 break 5687 5688 matched_array = False 5689 values = self._parse_csv(self._parse_disjunction) or None 5690 if ( 5691 values 5692 and not schema 5693 and ( 5694 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS 5695 or datatype_token == TokenType.ARRAY 5696 or not self._match(TokenType.R_BRACKET, advance=False) 5697 ) 5698 ): 5699 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5700 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5701 self._retreat(index) 5702 break 5703 5704 this = exp.DataType( 5705 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5706 ) 5707 self._match(TokenType.R_BRACKET) 5708 5709 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5710 converter = self.TYPE_CONVERTERS.get(this.this) 5711 if converter: 5712 this = converter(t.cast(exp.DataType, this)) 5713 5714 return this 5715 5716 def _parse_vector_expressions( 5717 self, expressions: t.List[exp.Expression] 5718 ) -> t.List[exp.Expression]: 5719 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5720 5721 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5722 index = self._index 5723 5724 if ( 5725 self._curr 5726 and self._next 5727 and self._curr.token_type in self.TYPE_TOKENS 5728 and self._next.token_type in self.TYPE_TOKENS 5729 ): 5730 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5731 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5732 this = self._parse_id_var() 5733 else: 5734 this = ( 5735 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5736 or self._parse_id_var() 5737 ) 5738 5739 self._match(TokenType.COLON) 5740 5741 if ( 5742 type_required 5743 and not isinstance(this, exp.DataType) 5744 and not self._match_set(self.TYPE_TOKENS, advance=False) 5745 ): 5746 self._retreat(index) 5747 return self._parse_types() 5748 5749 return self._parse_column_def(this) 5750 5751 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5752 if not self._match_text_seq("AT", "TIME", "ZONE"): 5753 return this 5754 return self._parse_at_time_zone( 5755 self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5756 ) 5757 5758 def _parse_column(self) -> t.Optional[exp.Expression]: 5759 this = self._parse_column_reference() 5760 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5761 5762 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5763 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5764 5765 return column 5766 5767 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5768 this = self._parse_field() 5769 if ( 5770 not this 5771 and self._match(TokenType.VALUES, advance=False) 5772 and self.VALUES_FOLLOWED_BY_PAREN 5773 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5774 ): 5775 this = self._parse_id_var() 5776 5777 if isinstance(this, exp.Identifier): 5778 # We bubble up comments from the Identifier to the Column 5779 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5780 5781 return this 5782 5783 def _parse_colon_as_variant_extract( 5784 self, this: t.Optional[exp.Expression] 5785 ) -> t.Optional[exp.Expression]: 5786 casts = [] 5787 json_path = [] 5788 escape = None 5789 5790 while self._match(TokenType.COLON): 5791 start_index = self._index 5792 5793 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5794 path = self._parse_column_ops( 5795 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5796 ) 5797 5798 # The cast :: operator has a lower precedence than the extraction operator :, so 5799 # we rearrange the AST appropriately to avoid casting the JSON path 5800 while isinstance(path, exp.Cast): 5801 casts.append(path.to) 5802 path = path.this 5803 5804 if casts: 5805 dcolon_offset = next( 5806 i 5807 for i, t in enumerate(self._tokens[start_index:]) 5808 if t.token_type == TokenType.DCOLON 5809 ) 5810 end_token = self._tokens[start_index + dcolon_offset - 1] 5811 else: 5812 end_token = self._prev 5813 5814 if path: 5815 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5816 # it'll roundtrip to a string literal in GET_PATH 5817 if isinstance(path, exp.Identifier) and path.quoted: 5818 escape = True 5819 5820 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5821 5822 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5823 # Databricks transforms it back to the colon/dot notation 5824 if json_path: 5825 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5826 5827 if json_path_expr: 5828 json_path_expr.set("escape", escape) 5829 5830 this = self.expression( 5831 exp.JSONExtract, 5832 this=this, 5833 expression=json_path_expr, 5834 variant_extract=True, 5835 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5836 ) 5837 5838 while casts: 5839 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5840 5841 return this 5842 5843 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5844 return self._parse_types() 5845 5846 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5847 this = self._parse_bracket(this) 5848 5849 while self._match_set(self.COLUMN_OPERATORS): 5850 op_token = self._prev.token_type 5851 op = self.COLUMN_OPERATORS.get(op_token) 5852 5853 if op_token in self.CAST_COLUMN_OPERATORS: 5854 field = self._parse_dcolon() 5855 if not field: 5856 self.raise_error("Expected type") 5857 elif op and self._curr: 5858 field = self._parse_column_reference() or self._parse_bitwise() 5859 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5860 field = self._parse_column_ops(field) 5861 else: 5862 field = self._parse_field(any_token=True, anonymous_func=True) 5863 5864 # Function calls can be qualified, e.g., x.y.FOO() 5865 # This converts the final AST to a series of Dots leading to the function call 5866 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5867 if isinstance(field, (exp.Func, exp.Window)) and this: 5868 this = this.transform( 5869 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5870 ) 5871 5872 if op: 5873 this = op(self, this, field) 5874 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5875 this = self.expression( 5876 exp.Column, 5877 comments=this.comments, 5878 this=field, 5879 table=this.this, 5880 db=this.args.get("table"), 5881 catalog=this.args.get("db"), 5882 ) 5883 elif isinstance(field, exp.Window): 5884 # Move the exp.Dot's to the window's function 5885 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5886 field.set("this", window_func) 5887 this = field 5888 else: 5889 this = self.expression(exp.Dot, this=this, expression=field) 5890 5891 if field and field.comments: 5892 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5893 5894 this = self._parse_bracket(this) 5895 5896 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5897 5898 def _parse_paren(self) -> t.Optional[exp.Expression]: 5899 if not self._match(TokenType.L_PAREN): 5900 return None 5901 5902 comments = self._prev_comments 5903 query = self._parse_select() 5904 5905 if query: 5906 expressions = [query] 5907 else: 5908 expressions = self._parse_expressions() 5909 5910 this = seq_get(expressions, 0) 5911 5912 if not this and self._match(TokenType.R_PAREN, advance=False): 5913 this = self.expression(exp.Tuple) 5914 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5915 this = self._parse_subquery(this=this, parse_alias=False) 5916 elif isinstance(this, (exp.Subquery, exp.Values)): 5917 this = self._parse_subquery( 5918 this=self._parse_query_modifiers(self._parse_set_operations(this)), 5919 parse_alias=False, 5920 ) 5921 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5922 this = self.expression(exp.Tuple, expressions=expressions) 5923 else: 5924 this = self.expression(exp.Paren, this=this) 5925 5926 if this: 5927 this.add_comments(comments) 5928 5929 self._match_r_paren(expression=this) 5930 5931 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 5932 return self._parse_window(this) 5933 5934 return this 5935 5936 def _parse_primary(self) -> t.Optional[exp.Expression]: 5937 if self._match_set(self.PRIMARY_PARSERS): 5938 token_type = self._prev.token_type 5939 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5940 5941 if token_type == TokenType.STRING: 5942 expressions = [primary] 5943 while self._match(TokenType.STRING): 5944 expressions.append(exp.Literal.string(self._prev.text)) 5945 5946 if len(expressions) > 1: 5947 return self.expression( 5948 exp.Concat, expressions=expressions, coalesce=self.dialect.CONCAT_COALESCE 5949 ) 5950 5951 return primary 5952 5953 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5954 return exp.Literal.number(f"0.{self._prev.text}") 5955 5956 return self._parse_paren() 5957 5958 def _parse_field( 5959 self, 5960 any_token: bool = False, 5961 tokens: t.Optional[t.Collection[TokenType]] = None, 5962 anonymous_func: bool = False, 5963 ) -> t.Optional[exp.Expression]: 5964 if anonymous_func: 5965 field = ( 5966 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5967 or self._parse_primary() 5968 ) 5969 else: 5970 field = self._parse_primary() or self._parse_function( 5971 anonymous=anonymous_func, any_token=any_token 5972 ) 5973 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5974 5975 def _parse_function( 5976 self, 5977 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5978 anonymous: bool = False, 5979 optional_parens: bool = True, 5980 any_token: bool = False, 5981 ) -> t.Optional[exp.Expression]: 5982 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5983 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5984 fn_syntax = False 5985 if ( 5986 self._match(TokenType.L_BRACE, advance=False) 5987 and self._next 5988 and self._next.text.upper() == "FN" 5989 ): 5990 self._advance(2) 5991 fn_syntax = True 5992 5993 func = self._parse_function_call( 5994 functions=functions, 5995 anonymous=anonymous, 5996 optional_parens=optional_parens, 5997 any_token=any_token, 5998 ) 5999 6000 if fn_syntax: 6001 self._match(TokenType.R_BRACE) 6002 6003 return func 6004 6005 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 6006 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 6007 6008 def _parse_function_call( 6009 self, 6010 functions: t.Optional[t.Dict[str, t.Callable]] = None, 6011 anonymous: bool = False, 6012 optional_parens: bool = True, 6013 any_token: bool = False, 6014 ) -> t.Optional[exp.Expression]: 6015 if not self._curr: 6016 return None 6017 6018 comments = self._curr.comments 6019 prev = self._prev 6020 token = self._curr 6021 token_type = self._curr.token_type 6022 this = self._curr.text 6023 upper = this.upper() 6024 6025 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 6026 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 6027 self._advance() 6028 return self._parse_window(parser(self)) 6029 6030 if not self._next or self._next.token_type != TokenType.L_PAREN: 6031 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 6032 self._advance() 6033 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 6034 6035 return None 6036 6037 if any_token: 6038 if token_type in self.RESERVED_TOKENS: 6039 return None 6040 elif token_type not in self.FUNC_TOKENS: 6041 return None 6042 6043 self._advance(2) 6044 6045 parser = self.FUNCTION_PARSERS.get(upper) 6046 if parser and not anonymous: 6047 this = parser(self) 6048 else: 6049 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 6050 6051 if subquery_predicate: 6052 expr = None 6053 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 6054 expr = self._parse_select() 6055 self._match_r_paren() 6056 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 6057 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 6058 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 6059 self._advance(-1) 6060 expr = self._parse_bitwise() 6061 6062 if expr: 6063 return self.expression(subquery_predicate, comments=comments, this=expr) 6064 6065 if functions is None: 6066 functions = self.FUNCTIONS 6067 6068 function = functions.get(upper) 6069 known_function = function and not anonymous 6070 6071 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 6072 args = self._parse_function_args(alias) 6073 6074 post_func_comments = self._curr and self._curr.comments 6075 if known_function and post_func_comments: 6076 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 6077 # call we'll construct it as exp.Anonymous, even if it's "known" 6078 if any( 6079 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 6080 for comment in post_func_comments 6081 ): 6082 known_function = False 6083 6084 if alias and known_function: 6085 args = self._kv_to_prop_eq(args) 6086 6087 if known_function: 6088 func_builder = t.cast(t.Callable, function) 6089 6090 if "dialect" in func_builder.__code__.co_varnames: 6091 func = func_builder(args, dialect=self.dialect) 6092 else: 6093 func = func_builder(args) 6094 6095 func = self.validate_expression(func, args) 6096 if self.dialect.PRESERVE_ORIGINAL_NAMES: 6097 func.meta["name"] = this 6098 6099 this = func 6100 else: 6101 if token_type == TokenType.IDENTIFIER: 6102 this = exp.Identifier(this=this, quoted=True).update_positions(token) 6103 6104 this = self.expression(exp.Anonymous, this=this, expressions=args) 6105 6106 this = this.update_positions(token) 6107 6108 if isinstance(this, exp.Expression): 6109 this.add_comments(comments) 6110 6111 self._match_r_paren(this) 6112 return self._parse_window(this) 6113 6114 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 6115 return expression 6116 6117 def _kv_to_prop_eq( 6118 self, expressions: t.List[exp.Expression], parse_map: bool = False 6119 ) -> t.List[exp.Expression]: 6120 transformed = [] 6121 6122 for index, e in enumerate(expressions): 6123 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 6124 if isinstance(e, exp.Alias): 6125 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 6126 6127 if not isinstance(e, exp.PropertyEQ): 6128 e = self.expression( 6129 exp.PropertyEQ, 6130 this=e.this if parse_map else exp.to_identifier(e.this.name), 6131 expression=e.expression, 6132 ) 6133 6134 if isinstance(e.this, exp.Column): 6135 e.this.replace(e.this.this) 6136 else: 6137 e = self._to_prop_eq(e, index) 6138 6139 transformed.append(e) 6140 6141 return transformed 6142 6143 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 6144 return self._parse_statement() 6145 6146 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 6147 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 6148 6149 def _parse_user_defined_function( 6150 self, kind: t.Optional[TokenType] = None 6151 ) -> t.Optional[exp.Expression]: 6152 this = self._parse_table_parts(schema=True) 6153 6154 if not self._match(TokenType.L_PAREN): 6155 return this 6156 6157 expressions = self._parse_csv(self._parse_function_parameter) 6158 self._match_r_paren() 6159 return self.expression( 6160 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 6161 ) 6162 6163 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 6164 literal = self._parse_primary() 6165 if literal: 6166 return self.expression(exp.Introducer, token=token, expression=literal) 6167 6168 return self._identifier_expression(token) 6169 6170 def _parse_session_parameter(self) -> exp.SessionParameter: 6171 kind = None 6172 this = self._parse_id_var() or self._parse_primary() 6173 6174 if this and self._match(TokenType.DOT): 6175 kind = this.name 6176 this = self._parse_var() or self._parse_primary() 6177 6178 return self.expression(exp.SessionParameter, this=this, kind=kind) 6179 6180 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6181 return self._parse_id_var() 6182 6183 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6184 index = self._index 6185 6186 if self._match(TokenType.L_PAREN): 6187 expressions = t.cast( 6188 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6189 ) 6190 6191 if not self._match(TokenType.R_PAREN): 6192 self._retreat(index) 6193 else: 6194 expressions = [self._parse_lambda_arg()] 6195 6196 if self._match_set(self.LAMBDAS): 6197 return self.LAMBDAS[self._prev.token_type](self, expressions) 6198 6199 self._retreat(index) 6200 6201 this: t.Optional[exp.Expression] 6202 6203 if self._match(TokenType.DISTINCT): 6204 this = self.expression( 6205 exp.Distinct, expressions=self._parse_csv(self._parse_disjunction) 6206 ) 6207 else: 6208 this = self._parse_select_or_expression(alias=alias) 6209 6210 return self._parse_limit( 6211 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6212 ) 6213 6214 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6215 index = self._index 6216 if not self._match(TokenType.L_PAREN): 6217 return this 6218 6219 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6220 # expr can be of both types 6221 if self._match_set(self.SELECT_START_TOKENS): 6222 self._retreat(index) 6223 return this 6224 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6225 self._match_r_paren() 6226 return self.expression(exp.Schema, this=this, expressions=args) 6227 6228 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6229 return self._parse_column_def(self._parse_field(any_token=True)) 6230 6231 def _parse_column_def( 6232 self, this: t.Optional[exp.Expression], computed_column: bool = True 6233 ) -> t.Optional[exp.Expression]: 6234 # column defs are not really columns, they're identifiers 6235 if isinstance(this, exp.Column): 6236 this = this.this 6237 6238 if not computed_column: 6239 self._match(TokenType.ALIAS) 6240 6241 kind = self._parse_types(schema=True) 6242 6243 if self._match_text_seq("FOR", "ORDINALITY"): 6244 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6245 6246 constraints: t.List[exp.Expression] = [] 6247 6248 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6249 ("ALIAS", "MATERIALIZED") 6250 ): 6251 persisted = self._prev.text.upper() == "MATERIALIZED" 6252 constraint_kind = exp.ComputedColumnConstraint( 6253 this=self._parse_disjunction(), 6254 persisted=persisted or self._match_text_seq("PERSISTED"), 6255 data_type=exp.Var(this="AUTO") 6256 if self._match_text_seq("AUTO") 6257 else self._parse_types(), 6258 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6259 ) 6260 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6261 elif not kind and self._match_set({TokenType.IN, TokenType.OUT}, advance=False): 6262 in_out_constraint = self.expression( 6263 exp.InOutColumnConstraint, 6264 input_=self._match(TokenType.IN), 6265 output=self._match(TokenType.OUT), 6266 ) 6267 constraints.append(in_out_constraint) 6268 kind = self._parse_types() 6269 elif ( 6270 kind 6271 and self._match(TokenType.ALIAS, advance=False) 6272 and ( 6273 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6274 or (self._next and self._next.token_type == TokenType.L_PAREN) 6275 ) 6276 ): 6277 self._advance() 6278 constraints.append( 6279 self.expression( 6280 exp.ColumnConstraint, 6281 kind=exp.ComputedColumnConstraint( 6282 this=self._parse_disjunction(), 6283 persisted=self._match_texts(("STORED", "VIRTUAL")) 6284 and self._prev.text.upper() == "STORED", 6285 ), 6286 ) 6287 ) 6288 6289 while True: 6290 constraint = self._parse_column_constraint() 6291 if not constraint: 6292 break 6293 constraints.append(constraint) 6294 6295 if not kind and not constraints: 6296 return this 6297 6298 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6299 6300 def _parse_auto_increment( 6301 self, 6302 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6303 start = None 6304 increment = None 6305 order = None 6306 6307 if self._match(TokenType.L_PAREN, advance=False): 6308 args = self._parse_wrapped_csv(self._parse_bitwise) 6309 start = seq_get(args, 0) 6310 increment = seq_get(args, 1) 6311 elif self._match_text_seq("START"): 6312 start = self._parse_bitwise() 6313 self._match_text_seq("INCREMENT") 6314 increment = self._parse_bitwise() 6315 if self._match_text_seq("ORDER"): 6316 order = True 6317 elif self._match_text_seq("NOORDER"): 6318 order = False 6319 6320 if start and increment: 6321 return exp.GeneratedAsIdentityColumnConstraint( 6322 start=start, increment=increment, this=False, order=order 6323 ) 6324 6325 return exp.AutoIncrementColumnConstraint() 6326 6327 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6328 if not self._match_text_seq("REFRESH"): 6329 self._retreat(self._index - 1) 6330 return None 6331 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6332 6333 def _parse_compress(self) -> exp.CompressColumnConstraint: 6334 if self._match(TokenType.L_PAREN, advance=False): 6335 return self.expression( 6336 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6337 ) 6338 6339 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6340 6341 def _parse_generated_as_identity( 6342 self, 6343 ) -> ( 6344 exp.GeneratedAsIdentityColumnConstraint 6345 | exp.ComputedColumnConstraint 6346 | exp.GeneratedAsRowColumnConstraint 6347 ): 6348 if self._match_text_seq("BY", "DEFAULT"): 6349 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6350 this = self.expression( 6351 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6352 ) 6353 else: 6354 self._match_text_seq("ALWAYS") 6355 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6356 6357 self._match(TokenType.ALIAS) 6358 6359 if self._match_text_seq("ROW"): 6360 start = self._match_text_seq("START") 6361 if not start: 6362 self._match(TokenType.END) 6363 hidden = self._match_text_seq("HIDDEN") 6364 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6365 6366 identity = self._match_text_seq("IDENTITY") 6367 6368 if self._match(TokenType.L_PAREN): 6369 if self._match(TokenType.START_WITH): 6370 this.set("start", self._parse_bitwise()) 6371 if self._match_text_seq("INCREMENT", "BY"): 6372 this.set("increment", self._parse_bitwise()) 6373 if self._match_text_seq("MINVALUE"): 6374 this.set("minvalue", self._parse_bitwise()) 6375 if self._match_text_seq("MAXVALUE"): 6376 this.set("maxvalue", self._parse_bitwise()) 6377 6378 if self._match_text_seq("CYCLE"): 6379 this.set("cycle", True) 6380 elif self._match_text_seq("NO", "CYCLE"): 6381 this.set("cycle", False) 6382 6383 if not identity: 6384 this.set("expression", self._parse_range()) 6385 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6386 args = self._parse_csv(self._parse_bitwise) 6387 this.set("start", seq_get(args, 0)) 6388 this.set("increment", seq_get(args, 1)) 6389 6390 self._match_r_paren() 6391 6392 return this 6393 6394 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6395 self._match_text_seq("LENGTH") 6396 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6397 6398 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6399 if self._match_text_seq("NULL"): 6400 return self.expression(exp.NotNullColumnConstraint) 6401 if self._match_text_seq("CASESPECIFIC"): 6402 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6403 if self._match_text_seq("FOR", "REPLICATION"): 6404 return self.expression(exp.NotForReplicationColumnConstraint) 6405 6406 # Unconsume the `NOT` token 6407 self._retreat(self._index - 1) 6408 return None 6409 6410 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6411 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6412 6413 procedure_option_follows = ( 6414 self._match(TokenType.WITH, advance=False) 6415 and self._next 6416 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6417 ) 6418 6419 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6420 return self.expression( 6421 exp.ColumnConstraint, 6422 this=this, 6423 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6424 ) 6425 6426 return this 6427 6428 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6429 if not self._match(TokenType.CONSTRAINT): 6430 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6431 6432 return self.expression( 6433 exp.Constraint, 6434 this=self._parse_id_var(), 6435 expressions=self._parse_unnamed_constraints(), 6436 ) 6437 6438 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6439 constraints = [] 6440 while True: 6441 constraint = self._parse_unnamed_constraint() or self._parse_function() 6442 if not constraint: 6443 break 6444 constraints.append(constraint) 6445 6446 return constraints 6447 6448 def _parse_unnamed_constraint( 6449 self, constraints: t.Optional[t.Collection[str]] = None 6450 ) -> t.Optional[exp.Expression]: 6451 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6452 constraints or self.CONSTRAINT_PARSERS 6453 ): 6454 return None 6455 6456 constraint = self._prev.text.upper() 6457 if constraint not in self.CONSTRAINT_PARSERS: 6458 self.raise_error(f"No parser found for schema constraint {constraint}.") 6459 6460 return self.CONSTRAINT_PARSERS[constraint](self) 6461 6462 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6463 return self._parse_id_var(any_token=False) 6464 6465 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6466 self._match_texts(("KEY", "INDEX")) 6467 return self.expression( 6468 exp.UniqueColumnConstraint, 6469 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6470 this=self._parse_schema(self._parse_unique_key()), 6471 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6472 on_conflict=self._parse_on_conflict(), 6473 options=self._parse_key_constraint_options(), 6474 ) 6475 6476 def _parse_key_constraint_options(self) -> t.List[str]: 6477 options = [] 6478 while True: 6479 if not self._curr: 6480 break 6481 6482 if self._match(TokenType.ON): 6483 action = None 6484 on = self._advance_any() and self._prev.text 6485 6486 if self._match_text_seq("NO", "ACTION"): 6487 action = "NO ACTION" 6488 elif self._match_text_seq("CASCADE"): 6489 action = "CASCADE" 6490 elif self._match_text_seq("RESTRICT"): 6491 action = "RESTRICT" 6492 elif self._match_pair(TokenType.SET, TokenType.NULL): 6493 action = "SET NULL" 6494 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6495 action = "SET DEFAULT" 6496 else: 6497 self.raise_error("Invalid key constraint") 6498 6499 options.append(f"ON {on} {action}") 6500 else: 6501 var = self._parse_var_from_options( 6502 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6503 ) 6504 if not var: 6505 break 6506 options.append(var.name) 6507 6508 return options 6509 6510 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6511 if match and not self._match(TokenType.REFERENCES): 6512 return None 6513 6514 expressions = None 6515 this = self._parse_table(schema=True) 6516 options = self._parse_key_constraint_options() 6517 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6518 6519 def _parse_foreign_key(self) -> exp.ForeignKey: 6520 expressions = ( 6521 self._parse_wrapped_id_vars() 6522 if not self._match(TokenType.REFERENCES, advance=False) 6523 else None 6524 ) 6525 reference = self._parse_references() 6526 on_options = {} 6527 6528 while self._match(TokenType.ON): 6529 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6530 self.raise_error("Expected DELETE or UPDATE") 6531 6532 kind = self._prev.text.lower() 6533 6534 if self._match_text_seq("NO", "ACTION"): 6535 action = "NO ACTION" 6536 elif self._match(TokenType.SET): 6537 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6538 action = "SET " + self._prev.text.upper() 6539 else: 6540 self._advance() 6541 action = self._prev.text.upper() 6542 6543 on_options[kind] = action 6544 6545 return self.expression( 6546 exp.ForeignKey, 6547 expressions=expressions, 6548 reference=reference, 6549 options=self._parse_key_constraint_options(), 6550 **on_options, # type: ignore 6551 ) 6552 6553 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6554 return self._parse_field() 6555 6556 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6557 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6558 self._retreat(self._index - 1) 6559 return None 6560 6561 id_vars = self._parse_wrapped_id_vars() 6562 return self.expression( 6563 exp.PeriodForSystemTimeConstraint, 6564 this=seq_get(id_vars, 0), 6565 expression=seq_get(id_vars, 1), 6566 ) 6567 6568 def _parse_primary_key( 6569 self, wrapped_optional: bool = False, in_props: bool = False 6570 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6571 desc = ( 6572 self._match_set((TokenType.ASC, TokenType.DESC)) 6573 and self._prev.token_type == TokenType.DESC 6574 ) 6575 6576 this = None 6577 if ( 6578 self._curr.text.upper() not in self.CONSTRAINT_PARSERS 6579 and self._next 6580 and self._next.token_type == TokenType.L_PAREN 6581 ): 6582 this = self._parse_id_var() 6583 6584 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6585 return self.expression( 6586 exp.PrimaryKeyColumnConstraint, 6587 desc=desc, 6588 options=self._parse_key_constraint_options(), 6589 ) 6590 6591 expressions = self._parse_wrapped_csv( 6592 self._parse_primary_key_part, optional=wrapped_optional 6593 ) 6594 6595 return self.expression( 6596 exp.PrimaryKey, 6597 this=this, 6598 expressions=expressions, 6599 include=self._parse_index_params(), 6600 options=self._parse_key_constraint_options(), 6601 ) 6602 6603 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6604 return self._parse_slice(self._parse_alias(self._parse_disjunction(), explicit=True)) 6605 6606 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6607 """ 6608 Parses a datetime column in ODBC format. We parse the column into the corresponding 6609 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6610 same as we did for `DATE('yyyy-mm-dd')`. 6611 6612 Reference: 6613 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6614 """ 6615 self._match(TokenType.VAR) 6616 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6617 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6618 if not self._match(TokenType.R_BRACE): 6619 self.raise_error("Expected }") 6620 return expression 6621 6622 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6623 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6624 return this 6625 6626 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6627 map_token = seq_get(self._tokens, self._index - 2) 6628 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6629 else: 6630 parse_map = False 6631 6632 bracket_kind = self._prev.token_type 6633 if ( 6634 bracket_kind == TokenType.L_BRACE 6635 and self._curr 6636 and self._curr.token_type == TokenType.VAR 6637 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6638 ): 6639 return self._parse_odbc_datetime_literal() 6640 6641 expressions = self._parse_csv( 6642 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6643 ) 6644 6645 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6646 self.raise_error("Expected ]") 6647 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6648 self.raise_error("Expected }") 6649 6650 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6651 if bracket_kind == TokenType.L_BRACE: 6652 this = self.expression( 6653 exp.Struct, 6654 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6655 ) 6656 elif not this: 6657 this = build_array_constructor( 6658 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6659 ) 6660 else: 6661 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6662 if constructor_type: 6663 return build_array_constructor( 6664 constructor_type, 6665 args=expressions, 6666 bracket_kind=bracket_kind, 6667 dialect=self.dialect, 6668 ) 6669 6670 expressions = apply_index_offset( 6671 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6672 ) 6673 this = self.expression( 6674 exp.Bracket, 6675 this=this, 6676 expressions=expressions, 6677 comments=this.pop_comments(), 6678 ) 6679 6680 self._add_comments(this) 6681 return self._parse_bracket(this) 6682 6683 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6684 if not self._match(TokenType.COLON): 6685 return this 6686 6687 if self._match_pair(TokenType.DASH, TokenType.COLON, advance=False): 6688 self._advance() 6689 end: t.Optional[exp.Expression] = -exp.Literal.number("1") 6690 else: 6691 end = self._parse_unary() 6692 step = self._parse_unary() if self._match(TokenType.COLON) else None 6693 return self.expression(exp.Slice, this=this, expression=end, step=step) 6694 6695 def _parse_case(self) -> t.Optional[exp.Expression]: 6696 if self._match(TokenType.DOT, advance=False): 6697 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6698 self._retreat(self._index - 1) 6699 return None 6700 6701 ifs = [] 6702 default = None 6703 6704 comments = self._prev_comments 6705 expression = self._parse_disjunction() 6706 6707 while self._match(TokenType.WHEN): 6708 this = self._parse_disjunction() 6709 self._match(TokenType.THEN) 6710 then = self._parse_disjunction() 6711 ifs.append(self.expression(exp.If, this=this, true=then)) 6712 6713 if self._match(TokenType.ELSE): 6714 default = self._parse_disjunction() 6715 6716 if not self._match(TokenType.END): 6717 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6718 default = exp.column("interval") 6719 else: 6720 self.raise_error("Expected END after CASE", self._prev) 6721 6722 return self.expression( 6723 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6724 ) 6725 6726 def _parse_if(self) -> t.Optional[exp.Expression]: 6727 if self._match(TokenType.L_PAREN): 6728 args = self._parse_csv( 6729 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6730 ) 6731 this = self.validate_expression(exp.If.from_arg_list(args), args) 6732 self._match_r_paren() 6733 else: 6734 index = self._index - 1 6735 6736 if self.NO_PAREN_IF_COMMANDS and index == 0: 6737 return self._parse_as_command(self._prev) 6738 6739 condition = self._parse_disjunction() 6740 6741 if not condition: 6742 self._retreat(index) 6743 return None 6744 6745 self._match(TokenType.THEN) 6746 true = self._parse_disjunction() 6747 false = self._parse_disjunction() if self._match(TokenType.ELSE) else None 6748 self._match(TokenType.END) 6749 this = self.expression(exp.If, this=condition, true=true, false=false) 6750 6751 return this 6752 6753 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6754 if not self._match_text_seq("VALUE", "FOR"): 6755 self._retreat(self._index - 1) 6756 return None 6757 6758 return self.expression( 6759 exp.NextValueFor, 6760 this=self._parse_column(), 6761 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6762 ) 6763 6764 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6765 this = self._parse_function() or self._parse_var_or_string(upper=True) 6766 6767 if self._match(TokenType.FROM): 6768 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6769 6770 if not self._match(TokenType.COMMA): 6771 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6772 6773 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6774 6775 def _parse_gap_fill(self) -> exp.GapFill: 6776 self._match(TokenType.TABLE) 6777 this = self._parse_table() 6778 6779 self._match(TokenType.COMMA) 6780 args = [this, *self._parse_csv(self._parse_lambda)] 6781 6782 gap_fill = exp.GapFill.from_arg_list(args) 6783 return self.validate_expression(gap_fill, args) 6784 6785 def _parse_char(self) -> exp.Chr: 6786 return self.expression( 6787 exp.Chr, 6788 expressions=self._parse_csv(self._parse_assignment), 6789 charset=self._match(TokenType.USING) and self._parse_var(), 6790 ) 6791 6792 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6793 this = self._parse_disjunction() 6794 6795 if not self._match(TokenType.ALIAS): 6796 if self._match(TokenType.COMMA): 6797 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6798 6799 self.raise_error("Expected AS after CAST") 6800 6801 fmt = None 6802 to = self._parse_types() 6803 6804 default = self._match(TokenType.DEFAULT) 6805 if default: 6806 default = self._parse_bitwise() 6807 self._match_text_seq("ON", "CONVERSION", "ERROR") 6808 6809 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6810 fmt_string = self._parse_string() 6811 fmt = self._parse_at_time_zone(fmt_string) 6812 6813 if not to: 6814 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6815 if to.this in exp.DataType.TEMPORAL_TYPES: 6816 this = self.expression( 6817 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6818 this=this, 6819 format=exp.Literal.string( 6820 format_time( 6821 fmt_string.this if fmt_string else "", 6822 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6823 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6824 ) 6825 ), 6826 safe=safe, 6827 ) 6828 6829 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6830 this.set("zone", fmt.args["zone"]) 6831 return this 6832 elif not to: 6833 self.raise_error("Expected TYPE after CAST") 6834 elif isinstance(to, exp.Identifier): 6835 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6836 elif to.this == exp.DataType.Type.CHAR: 6837 if self._match(TokenType.CHARACTER_SET): 6838 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6839 6840 return self.build_cast( 6841 strict=strict, 6842 this=this, 6843 to=to, 6844 format=fmt, 6845 safe=safe, 6846 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6847 default=default, 6848 ) 6849 6850 def _parse_string_agg(self) -> exp.GroupConcat: 6851 if self._match(TokenType.DISTINCT): 6852 args: t.List[t.Optional[exp.Expression]] = [ 6853 self.expression(exp.Distinct, expressions=[self._parse_disjunction()]) 6854 ] 6855 if self._match(TokenType.COMMA): 6856 args.extend(self._parse_csv(self._parse_disjunction)) 6857 else: 6858 args = self._parse_csv(self._parse_disjunction) # type: ignore 6859 6860 if self._match_text_seq("ON", "OVERFLOW"): 6861 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6862 if self._match_text_seq("ERROR"): 6863 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6864 else: 6865 self._match_text_seq("TRUNCATE") 6866 on_overflow = self.expression( 6867 exp.OverflowTruncateBehavior, 6868 this=self._parse_string(), 6869 with_count=( 6870 self._match_text_seq("WITH", "COUNT") 6871 or not self._match_text_seq("WITHOUT", "COUNT") 6872 ), 6873 ) 6874 else: 6875 on_overflow = None 6876 6877 index = self._index 6878 if not self._match(TokenType.R_PAREN) and args: 6879 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6880 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6881 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6882 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6883 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6884 6885 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6886 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6887 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6888 if not self._match_text_seq("WITHIN", "GROUP"): 6889 self._retreat(index) 6890 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6891 6892 # The corresponding match_r_paren will be called in parse_function (caller) 6893 self._match_l_paren() 6894 6895 return self.expression( 6896 exp.GroupConcat, 6897 this=self._parse_order(this=seq_get(args, 0)), 6898 separator=seq_get(args, 1), 6899 on_overflow=on_overflow, 6900 ) 6901 6902 def _parse_convert( 6903 self, strict: bool, safe: t.Optional[bool] = None 6904 ) -> t.Optional[exp.Expression]: 6905 this = self._parse_bitwise() 6906 6907 if self._match(TokenType.USING): 6908 to: t.Optional[exp.Expression] = self.expression( 6909 exp.CharacterSet, this=self._parse_var(tokens={TokenType.BINARY}) 6910 ) 6911 elif self._match(TokenType.COMMA): 6912 to = self._parse_types() 6913 else: 6914 to = None 6915 6916 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6917 6918 def _parse_xml_element(self) -> exp.XMLElement: 6919 if self._match_text_seq("EVALNAME"): 6920 evalname = True 6921 this = self._parse_bitwise() 6922 else: 6923 evalname = None 6924 self._match_text_seq("NAME") 6925 this = self._parse_id_var() 6926 6927 return self.expression( 6928 exp.XMLElement, 6929 this=this, 6930 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_bitwise), 6931 evalname=evalname, 6932 ) 6933 6934 def _parse_xml_table(self) -> exp.XMLTable: 6935 namespaces = None 6936 passing = None 6937 columns = None 6938 6939 if self._match_text_seq("XMLNAMESPACES", "("): 6940 namespaces = self._parse_xml_namespace() 6941 self._match_text_seq(")", ",") 6942 6943 this = self._parse_string() 6944 6945 if self._match_text_seq("PASSING"): 6946 # The BY VALUE keywords are optional and are provided for semantic clarity 6947 self._match_text_seq("BY", "VALUE") 6948 passing = self._parse_csv(self._parse_column) 6949 6950 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6951 6952 if self._match_text_seq("COLUMNS"): 6953 columns = self._parse_csv(self._parse_field_def) 6954 6955 return self.expression( 6956 exp.XMLTable, 6957 this=this, 6958 namespaces=namespaces, 6959 passing=passing, 6960 columns=columns, 6961 by_ref=by_ref, 6962 ) 6963 6964 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6965 namespaces = [] 6966 6967 while True: 6968 if self._match(TokenType.DEFAULT): 6969 uri = self._parse_string() 6970 else: 6971 uri = self._parse_alias(self._parse_string()) 6972 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6973 if not self._match(TokenType.COMMA): 6974 break 6975 6976 return namespaces 6977 6978 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6979 args = self._parse_csv(self._parse_disjunction) 6980 6981 if len(args) < 3: 6982 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6983 6984 return self.expression(exp.DecodeCase, expressions=args) 6985 6986 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6987 self._match_text_seq("KEY") 6988 key = self._parse_column() 6989 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6990 self._match_text_seq("VALUE") 6991 value = self._parse_bitwise() 6992 6993 if not key and not value: 6994 return None 6995 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6996 6997 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6998 if not this or not self._match_text_seq("FORMAT", "JSON"): 6999 return this 7000 7001 return self.expression(exp.FormatJson, this=this) 7002 7003 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 7004 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 7005 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 7006 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 7007 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 7008 else: 7009 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 7010 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 7011 7012 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 7013 7014 if not empty and not error and not null: 7015 return None 7016 7017 return self.expression( 7018 exp.OnCondition, 7019 empty=empty, 7020 error=error, 7021 null=null, 7022 ) 7023 7024 def _parse_on_handling( 7025 self, on: str, *values: str 7026 ) -> t.Optional[str] | t.Optional[exp.Expression]: 7027 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 7028 for value in values: 7029 if self._match_text_seq(value, "ON", on): 7030 return f"{value} ON {on}" 7031 7032 index = self._index 7033 if self._match(TokenType.DEFAULT): 7034 default_value = self._parse_bitwise() 7035 if self._match_text_seq("ON", on): 7036 return default_value 7037 7038 self._retreat(index) 7039 7040 return None 7041 7042 @t.overload 7043 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 7044 7045 @t.overload 7046 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 7047 7048 def _parse_json_object(self, agg=False): 7049 star = self._parse_star() 7050 expressions = ( 7051 [star] 7052 if star 7053 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 7054 ) 7055 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 7056 7057 unique_keys = None 7058 if self._match_text_seq("WITH", "UNIQUE"): 7059 unique_keys = True 7060 elif self._match_text_seq("WITHOUT", "UNIQUE"): 7061 unique_keys = False 7062 7063 self._match_text_seq("KEYS") 7064 7065 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 7066 self._parse_type() 7067 ) 7068 encoding = self._match_text_seq("ENCODING") and self._parse_var() 7069 7070 return self.expression( 7071 exp.JSONObjectAgg if agg else exp.JSONObject, 7072 expressions=expressions, 7073 null_handling=null_handling, 7074 unique_keys=unique_keys, 7075 return_type=return_type, 7076 encoding=encoding, 7077 ) 7078 7079 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 7080 def _parse_json_column_def(self) -> exp.JSONColumnDef: 7081 if not self._match_text_seq("NESTED"): 7082 this = self._parse_id_var() 7083 ordinality = self._match_pair(TokenType.FOR, TokenType.ORDINALITY) 7084 kind = self._parse_types(allow_identifiers=False) 7085 nested = None 7086 else: 7087 this = None 7088 ordinality = None 7089 kind = None 7090 nested = True 7091 7092 path = self._match_text_seq("PATH") and self._parse_string() 7093 nested_schema = nested and self._parse_json_schema() 7094 7095 return self.expression( 7096 exp.JSONColumnDef, 7097 this=this, 7098 kind=kind, 7099 path=path, 7100 nested_schema=nested_schema, 7101 ordinality=ordinality, 7102 ) 7103 7104 def _parse_json_schema(self) -> exp.JSONSchema: 7105 self._match_text_seq("COLUMNS") 7106 return self.expression( 7107 exp.JSONSchema, 7108 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 7109 ) 7110 7111 def _parse_json_table(self) -> exp.JSONTable: 7112 this = self._parse_format_json(self._parse_bitwise()) 7113 path = self._match(TokenType.COMMA) and self._parse_string() 7114 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 7115 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 7116 schema = self._parse_json_schema() 7117 7118 return exp.JSONTable( 7119 this=this, 7120 schema=schema, 7121 path=path, 7122 error_handling=error_handling, 7123 empty_handling=empty_handling, 7124 ) 7125 7126 def _parse_match_against(self) -> exp.MatchAgainst: 7127 if self._match_text_seq("TABLE"): 7128 # parse SingleStore MATCH(TABLE ...) syntax 7129 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 7130 expressions = [] 7131 table = self._parse_table() 7132 if table: 7133 expressions = [table] 7134 else: 7135 expressions = self._parse_csv(self._parse_column) 7136 7137 self._match_text_seq(")", "AGAINST", "(") 7138 7139 this = self._parse_string() 7140 7141 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 7142 modifier = "IN NATURAL LANGUAGE MODE" 7143 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 7144 modifier = f"{modifier} WITH QUERY EXPANSION" 7145 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 7146 modifier = "IN BOOLEAN MODE" 7147 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 7148 modifier = "WITH QUERY EXPANSION" 7149 else: 7150 modifier = None 7151 7152 return self.expression( 7153 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 7154 ) 7155 7156 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 7157 def _parse_open_json(self) -> exp.OpenJSON: 7158 this = self._parse_bitwise() 7159 path = self._match(TokenType.COMMA) and self._parse_string() 7160 7161 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 7162 this = self._parse_field(any_token=True) 7163 kind = self._parse_types() 7164 path = self._parse_string() 7165 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 7166 7167 return self.expression( 7168 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 7169 ) 7170 7171 expressions = None 7172 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 7173 self._match_l_paren() 7174 expressions = self._parse_csv(_parse_open_json_column_def) 7175 7176 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 7177 7178 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 7179 args = self._parse_csv(self._parse_bitwise) 7180 7181 if self._match(TokenType.IN): 7182 return self.expression( 7183 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 7184 ) 7185 7186 if haystack_first: 7187 haystack = seq_get(args, 0) 7188 needle = seq_get(args, 1) 7189 else: 7190 haystack = seq_get(args, 1) 7191 needle = seq_get(args, 0) 7192 7193 return self.expression( 7194 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 7195 ) 7196 7197 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 7198 args = self._parse_csv(self._parse_table) 7199 return exp.JoinHint(this=func_name.upper(), expressions=args) 7200 7201 def _parse_substring(self) -> exp.Substring: 7202 # Postgres supports the form: substring(string [from int] [for int]) 7203 # (despite being undocumented, the reverse order also works) 7204 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 7205 7206 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 7207 7208 start, length = None, None 7209 7210 while self._curr: 7211 if self._match(TokenType.FROM): 7212 start = self._parse_bitwise() 7213 elif self._match(TokenType.FOR): 7214 if not start: 7215 start = exp.Literal.number(1) 7216 length = self._parse_bitwise() 7217 else: 7218 break 7219 7220 if start: 7221 args.append(start) 7222 if length: 7223 args.append(length) 7224 7225 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7226 7227 def _parse_trim(self) -> exp.Trim: 7228 # https://www.w3resource.com/sql/character-functions/trim.php 7229 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7230 7231 position = None 7232 collation = None 7233 expression = None 7234 7235 if self._match_texts(self.TRIM_TYPES): 7236 position = self._prev.text.upper() 7237 7238 this = self._parse_bitwise() 7239 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7240 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7241 expression = self._parse_bitwise() 7242 7243 if invert_order: 7244 this, expression = expression, this 7245 7246 if self._match(TokenType.COLLATE): 7247 collation = self._parse_bitwise() 7248 7249 return self.expression( 7250 exp.Trim, this=this, position=position, expression=expression, collation=collation 7251 ) 7252 7253 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7254 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 7255 7256 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7257 return self._parse_window(self._parse_id_var(), alias=True) 7258 7259 def _parse_respect_or_ignore_nulls( 7260 self, this: t.Optional[exp.Expression] 7261 ) -> t.Optional[exp.Expression]: 7262 if self._match_text_seq("IGNORE", "NULLS"): 7263 return self.expression(exp.IgnoreNulls, this=this) 7264 if self._match_text_seq("RESPECT", "NULLS"): 7265 return self.expression(exp.RespectNulls, this=this) 7266 return this 7267 7268 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7269 if self._match(TokenType.HAVING): 7270 self._match_texts(("MAX", "MIN")) 7271 max = self._prev.text.upper() != "MIN" 7272 return self.expression( 7273 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7274 ) 7275 7276 return this 7277 7278 def _parse_window( 7279 self, this: t.Optional[exp.Expression], alias: bool = False 7280 ) -> t.Optional[exp.Expression]: 7281 func = this 7282 comments = func.comments if isinstance(func, exp.Expression) else None 7283 7284 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7285 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7286 if self._match_text_seq("WITHIN", "GROUP"): 7287 order = self._parse_wrapped(self._parse_order) 7288 this = self.expression(exp.WithinGroup, this=this, expression=order) 7289 7290 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7291 self._match(TokenType.WHERE) 7292 this = self.expression( 7293 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7294 ) 7295 self._match_r_paren() 7296 7297 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7298 # Some dialects choose to implement and some do not. 7299 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7300 7301 # There is some code above in _parse_lambda that handles 7302 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7303 7304 # The below changes handle 7305 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7306 7307 # Oracle allows both formats 7308 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7309 # and Snowflake chose to do the same for familiarity 7310 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7311 if isinstance(this, exp.AggFunc): 7312 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7313 7314 if ignore_respect and ignore_respect is not this: 7315 ignore_respect.replace(ignore_respect.this) 7316 this = self.expression(ignore_respect.__class__, this=this) 7317 7318 this = self._parse_respect_or_ignore_nulls(this) 7319 7320 # bigquery select from window x AS (partition by ...) 7321 if alias: 7322 over = None 7323 self._match(TokenType.ALIAS) 7324 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7325 return this 7326 else: 7327 over = self._prev.text.upper() 7328 7329 if comments and isinstance(func, exp.Expression): 7330 func.pop_comments() 7331 7332 if not self._match(TokenType.L_PAREN): 7333 return self.expression( 7334 exp.Window, 7335 comments=comments, 7336 this=this, 7337 alias=self._parse_id_var(False), 7338 over=over, 7339 ) 7340 7341 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7342 7343 first = self._match(TokenType.FIRST) 7344 if self._match_text_seq("LAST"): 7345 first = False 7346 7347 partition, order = self._parse_partition_and_order() 7348 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7349 7350 if kind: 7351 self._match(TokenType.BETWEEN) 7352 start = self._parse_window_spec() 7353 7354 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7355 exclude = ( 7356 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7357 if self._match_text_seq("EXCLUDE") 7358 else None 7359 ) 7360 7361 spec = self.expression( 7362 exp.WindowSpec, 7363 kind=kind, 7364 start=start["value"], 7365 start_side=start["side"], 7366 end=end.get("value"), 7367 end_side=end.get("side"), 7368 exclude=exclude, 7369 ) 7370 else: 7371 spec = None 7372 7373 self._match_r_paren() 7374 7375 window = self.expression( 7376 exp.Window, 7377 comments=comments, 7378 this=this, 7379 partition_by=partition, 7380 order=order, 7381 spec=spec, 7382 alias=window_alias, 7383 over=over, 7384 first=first, 7385 ) 7386 7387 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7388 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7389 return self._parse_window(window, alias=alias) 7390 7391 return window 7392 7393 def _parse_partition_and_order( 7394 self, 7395 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7396 return self._parse_partition_by(), self._parse_order() 7397 7398 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7399 self._match(TokenType.BETWEEN) 7400 7401 return { 7402 "value": ( 7403 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7404 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7405 or self._parse_bitwise() 7406 ), 7407 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7408 } 7409 7410 def _parse_alias( 7411 self, this: t.Optional[exp.Expression], explicit: bool = False 7412 ) -> t.Optional[exp.Expression]: 7413 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7414 # so this section tries to parse the clause version and if it fails, it treats the token 7415 # as an identifier (alias) 7416 if self._can_parse_limit_or_offset(): 7417 return this 7418 7419 any_token = self._match(TokenType.ALIAS) 7420 comments = self._prev_comments or [] 7421 7422 if explicit and not any_token: 7423 return this 7424 7425 if self._match(TokenType.L_PAREN): 7426 aliases = self.expression( 7427 exp.Aliases, 7428 comments=comments, 7429 this=this, 7430 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7431 ) 7432 self._match_r_paren(aliases) 7433 return aliases 7434 7435 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7436 self.STRING_ALIASES and self._parse_string_as_identifier() 7437 ) 7438 7439 if alias: 7440 comments.extend(alias.pop_comments()) 7441 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7442 column = this.this 7443 7444 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7445 if not this.comments and column and column.comments: 7446 this.comments = column.pop_comments() 7447 7448 return this 7449 7450 def _parse_id_var( 7451 self, 7452 any_token: bool = True, 7453 tokens: t.Optional[t.Collection[TokenType]] = None, 7454 ) -> t.Optional[exp.Expression]: 7455 expression = self._parse_identifier() 7456 if not expression and ( 7457 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7458 ): 7459 quoted = self._prev.token_type == TokenType.STRING 7460 expression = self._identifier_expression(quoted=quoted) 7461 7462 return expression 7463 7464 def _parse_string(self) -> t.Optional[exp.Expression]: 7465 if self._match_set(self.STRING_PARSERS): 7466 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7467 return self._parse_placeholder() 7468 7469 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7470 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7471 if output: 7472 output.update_positions(self._prev) 7473 return output 7474 7475 def _parse_number(self) -> t.Optional[exp.Expression]: 7476 if self._match_set(self.NUMERIC_PARSERS): 7477 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7478 return self._parse_placeholder() 7479 7480 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7481 if self._match(TokenType.IDENTIFIER): 7482 return self._identifier_expression(quoted=True) 7483 return self._parse_placeholder() 7484 7485 def _parse_var( 7486 self, 7487 any_token: bool = False, 7488 tokens: t.Optional[t.Collection[TokenType]] = None, 7489 upper: bool = False, 7490 ) -> t.Optional[exp.Expression]: 7491 if ( 7492 (any_token and self._advance_any()) 7493 or self._match(TokenType.VAR) 7494 or (self._match_set(tokens) if tokens else False) 7495 ): 7496 return self.expression( 7497 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7498 ) 7499 return self._parse_placeholder() 7500 7501 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7502 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7503 self._advance() 7504 return self._prev 7505 return None 7506 7507 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7508 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7509 7510 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7511 return self._parse_primary() or self._parse_var(any_token=True) 7512 7513 def _parse_null(self) -> t.Optional[exp.Expression]: 7514 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7515 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7516 return self._parse_placeholder() 7517 7518 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7519 if self._match(TokenType.TRUE): 7520 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7521 if self._match(TokenType.FALSE): 7522 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7523 return self._parse_placeholder() 7524 7525 def _parse_star(self) -> t.Optional[exp.Expression]: 7526 if self._match(TokenType.STAR): 7527 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7528 return self._parse_placeholder() 7529 7530 def _parse_parameter(self) -> exp.Parameter: 7531 this = self._parse_identifier() or self._parse_primary_or_var() 7532 return self.expression(exp.Parameter, this=this) 7533 7534 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7535 if self._match_set(self.PLACEHOLDER_PARSERS): 7536 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7537 if placeholder: 7538 return placeholder 7539 self._advance(-1) 7540 return None 7541 7542 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7543 if not self._match_texts(keywords): 7544 return None 7545 if self._match(TokenType.L_PAREN, advance=False): 7546 return self._parse_wrapped_csv(self._parse_expression) 7547 7548 expression = self._parse_alias(self._parse_disjunction(), explicit=True) 7549 return [expression] if expression else None 7550 7551 def _parse_csv( 7552 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7553 ) -> t.List[exp.Expression]: 7554 parse_result = parse_method() 7555 items = [parse_result] if parse_result is not None else [] 7556 7557 while self._match(sep): 7558 self._add_comments(parse_result) 7559 parse_result = parse_method() 7560 if parse_result is not None: 7561 items.append(parse_result) 7562 7563 return items 7564 7565 def _parse_tokens( 7566 self, parse_method: t.Callable, expressions: t.Dict 7567 ) -> t.Optional[exp.Expression]: 7568 this = parse_method() 7569 7570 while self._match_set(expressions): 7571 this = self.expression( 7572 expressions[self._prev.token_type], 7573 this=this, 7574 comments=self._prev_comments, 7575 expression=parse_method(), 7576 ) 7577 7578 return this 7579 7580 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7581 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7582 7583 def _parse_wrapped_csv( 7584 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7585 ) -> t.List[exp.Expression]: 7586 return self._parse_wrapped( 7587 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7588 ) 7589 7590 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7591 wrapped = self._match(TokenType.L_PAREN) 7592 if not wrapped and not optional: 7593 self.raise_error("Expecting (") 7594 parse_result = parse_method() 7595 if wrapped: 7596 self._match_r_paren() 7597 return parse_result 7598 7599 def _parse_expressions(self) -> t.List[exp.Expression]: 7600 return self._parse_csv(self._parse_expression) 7601 7602 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7603 return ( 7604 self._parse_set_operations( 7605 self._parse_alias(self._parse_assignment(), explicit=True) 7606 if alias 7607 else self._parse_assignment() 7608 ) 7609 or self._parse_select() 7610 ) 7611 7612 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7613 return self._parse_query_modifiers( 7614 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7615 ) 7616 7617 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7618 this = None 7619 if self._match_texts(self.TRANSACTION_KIND): 7620 this = self._prev.text 7621 7622 self._match_texts(("TRANSACTION", "WORK")) 7623 7624 modes = [] 7625 while True: 7626 mode = [] 7627 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7628 mode.append(self._prev.text) 7629 7630 if mode: 7631 modes.append(" ".join(mode)) 7632 if not self._match(TokenType.COMMA): 7633 break 7634 7635 return self.expression(exp.Transaction, this=this, modes=modes) 7636 7637 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7638 chain = None 7639 savepoint = None 7640 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7641 7642 self._match_texts(("TRANSACTION", "WORK")) 7643 7644 if self._match_text_seq("TO"): 7645 self._match_text_seq("SAVEPOINT") 7646 savepoint = self._parse_id_var() 7647 7648 if self._match(TokenType.AND): 7649 chain = not self._match_text_seq("NO") 7650 self._match_text_seq("CHAIN") 7651 7652 if is_rollback: 7653 return self.expression(exp.Rollback, savepoint=savepoint) 7654 7655 return self.expression(exp.Commit, chain=chain) 7656 7657 def _parse_refresh(self) -> exp.Refresh | exp.Command: 7658 if self._match(TokenType.TABLE): 7659 kind = "TABLE" 7660 elif self._match_text_seq("MATERIALIZED", "VIEW"): 7661 kind = "MATERIALIZED VIEW" 7662 else: 7663 kind = "" 7664 7665 this = self._parse_string() or self._parse_table() 7666 if not kind and not isinstance(this, exp.Literal): 7667 return self._parse_as_command(self._prev) 7668 7669 return self.expression(exp.Refresh, this=this, kind=kind) 7670 7671 def _parse_column_def_with_exists(self): 7672 start = self._index 7673 self._match(TokenType.COLUMN) 7674 7675 exists_column = self._parse_exists(not_=True) 7676 expression = self._parse_field_def() 7677 7678 if not isinstance(expression, exp.ColumnDef): 7679 self._retreat(start) 7680 return None 7681 7682 expression.set("exists", exists_column) 7683 7684 return expression 7685 7686 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7687 if not self._prev.text.upper() == "ADD": 7688 return None 7689 7690 expression = self._parse_column_def_with_exists() 7691 if not expression: 7692 return None 7693 7694 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7695 if self._match_texts(("FIRST", "AFTER")): 7696 position = self._prev.text 7697 column_position = self.expression( 7698 exp.ColumnPosition, this=self._parse_column(), position=position 7699 ) 7700 expression.set("position", column_position) 7701 7702 return expression 7703 7704 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7705 drop = self._match(TokenType.DROP) and self._parse_drop() 7706 if drop and not isinstance(drop, exp.Command): 7707 drop.set("kind", drop.args.get("kind", "COLUMN")) 7708 return drop 7709 7710 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7711 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7712 return self.expression( 7713 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7714 ) 7715 7716 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7717 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7718 self._match_text_seq("ADD") 7719 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7720 return self.expression( 7721 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7722 ) 7723 7724 column_def = self._parse_add_column() 7725 if isinstance(column_def, exp.ColumnDef): 7726 return column_def 7727 7728 exists = self._parse_exists(not_=True) 7729 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7730 return self.expression( 7731 exp.AddPartition, 7732 exists=exists, 7733 this=self._parse_field(any_token=True), 7734 location=self._match_text_seq("LOCATION", advance=False) 7735 and self._parse_property(), 7736 ) 7737 7738 return None 7739 7740 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7741 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7742 or self._match_text_seq("COLUMNS") 7743 ): 7744 schema = self._parse_schema() 7745 7746 return ( 7747 ensure_list(schema) 7748 if schema 7749 else self._parse_csv(self._parse_column_def_with_exists) 7750 ) 7751 7752 return self._parse_csv(_parse_add_alteration) 7753 7754 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7755 if self._match_texts(self.ALTER_ALTER_PARSERS): 7756 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7757 7758 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7759 # keyword after ALTER we default to parsing this statement 7760 self._match(TokenType.COLUMN) 7761 column = self._parse_field(any_token=True) 7762 7763 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7764 return self.expression(exp.AlterColumn, this=column, drop=True) 7765 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7766 return self.expression(exp.AlterColumn, this=column, default=self._parse_disjunction()) 7767 if self._match(TokenType.COMMENT): 7768 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7769 if self._match_text_seq("DROP", "NOT", "NULL"): 7770 return self.expression( 7771 exp.AlterColumn, 7772 this=column, 7773 drop=True, 7774 allow_null=True, 7775 ) 7776 if self._match_text_seq("SET", "NOT", "NULL"): 7777 return self.expression( 7778 exp.AlterColumn, 7779 this=column, 7780 allow_null=False, 7781 ) 7782 7783 if self._match_text_seq("SET", "VISIBLE"): 7784 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7785 if self._match_text_seq("SET", "INVISIBLE"): 7786 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7787 7788 self._match_text_seq("SET", "DATA") 7789 self._match_text_seq("TYPE") 7790 return self.expression( 7791 exp.AlterColumn, 7792 this=column, 7793 dtype=self._parse_types(), 7794 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7795 using=self._match(TokenType.USING) and self._parse_disjunction(), 7796 ) 7797 7798 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7799 if self._match_texts(("ALL", "EVEN", "AUTO")): 7800 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7801 7802 self._match_text_seq("KEY", "DISTKEY") 7803 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7804 7805 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7806 if compound: 7807 self._match_text_seq("SORTKEY") 7808 7809 if self._match(TokenType.L_PAREN, advance=False): 7810 return self.expression( 7811 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7812 ) 7813 7814 self._match_texts(("AUTO", "NONE")) 7815 return self.expression( 7816 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7817 ) 7818 7819 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7820 index = self._index - 1 7821 7822 partition_exists = self._parse_exists() 7823 if self._match(TokenType.PARTITION, advance=False): 7824 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7825 7826 self._retreat(index) 7827 return self._parse_csv(self._parse_drop_column) 7828 7829 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7830 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7831 exists = self._parse_exists() 7832 old_column = self._parse_column() 7833 to = self._match_text_seq("TO") 7834 new_column = self._parse_column() 7835 7836 if old_column is None or to is None or new_column is None: 7837 return None 7838 7839 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7840 7841 self._match_text_seq("TO") 7842 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7843 7844 def _parse_alter_table_set(self) -> exp.AlterSet: 7845 alter_set = self.expression(exp.AlterSet) 7846 7847 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7848 "TABLE", "PROPERTIES" 7849 ): 7850 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7851 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7852 alter_set.set("expressions", [self._parse_assignment()]) 7853 elif self._match_texts(("LOGGED", "UNLOGGED")): 7854 alter_set.set("option", exp.var(self._prev.text.upper())) 7855 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7856 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7857 elif self._match_text_seq("LOCATION"): 7858 alter_set.set("location", self._parse_field()) 7859 elif self._match_text_seq("ACCESS", "METHOD"): 7860 alter_set.set("access_method", self._parse_field()) 7861 elif self._match_text_seq("TABLESPACE"): 7862 alter_set.set("tablespace", self._parse_field()) 7863 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7864 alter_set.set("file_format", [self._parse_field()]) 7865 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7866 alter_set.set("file_format", self._parse_wrapped_options()) 7867 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7868 alter_set.set("copy_options", self._parse_wrapped_options()) 7869 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7870 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7871 else: 7872 if self._match_text_seq("SERDE"): 7873 alter_set.set("serde", self._parse_field()) 7874 7875 properties = self._parse_wrapped(self._parse_properties, optional=True) 7876 alter_set.set("expressions", [properties]) 7877 7878 return alter_set 7879 7880 def _parse_alter_session(self) -> exp.AlterSession: 7881 """Parse ALTER SESSION SET/UNSET statements.""" 7882 if self._match(TokenType.SET): 7883 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7884 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7885 7886 self._match_text_seq("UNSET") 7887 expressions = self._parse_csv( 7888 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7889 ) 7890 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7891 7892 def _parse_alter(self) -> exp.Alter | exp.Command: 7893 start = self._prev 7894 7895 alter_token = self._match_set(self.ALTERABLES) and self._prev 7896 if not alter_token: 7897 return self._parse_as_command(start) 7898 7899 exists = self._parse_exists() 7900 only = self._match_text_seq("ONLY") 7901 7902 if alter_token.token_type == TokenType.SESSION: 7903 this = None 7904 check = None 7905 cluster = None 7906 else: 7907 this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS) 7908 check = self._match_text_seq("WITH", "CHECK") 7909 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7910 7911 if self._next: 7912 self._advance() 7913 7914 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7915 if parser: 7916 actions = ensure_list(parser(self)) 7917 not_valid = self._match_text_seq("NOT", "VALID") 7918 options = self._parse_csv(self._parse_property) 7919 cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE") 7920 7921 if not self._curr and actions: 7922 return self.expression( 7923 exp.Alter, 7924 this=this, 7925 kind=alter_token.text.upper(), 7926 exists=exists, 7927 actions=actions, 7928 only=only, 7929 options=options, 7930 cluster=cluster, 7931 not_valid=not_valid, 7932 check=check, 7933 cascade=cascade, 7934 ) 7935 7936 return self._parse_as_command(start) 7937 7938 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7939 start = self._prev 7940 # https://duckdb.org/docs/sql/statements/analyze 7941 if not self._curr: 7942 return self.expression(exp.Analyze) 7943 7944 options = [] 7945 while self._match_texts(self.ANALYZE_STYLES): 7946 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7947 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7948 else: 7949 options.append(self._prev.text.upper()) 7950 7951 this: t.Optional[exp.Expression] = None 7952 inner_expression: t.Optional[exp.Expression] = None 7953 7954 kind = self._curr and self._curr.text.upper() 7955 7956 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7957 this = self._parse_table_parts() 7958 elif self._match_text_seq("TABLES"): 7959 if self._match_set((TokenType.FROM, TokenType.IN)): 7960 kind = f"{kind} {self._prev.text.upper()}" 7961 this = self._parse_table(schema=True, is_db_reference=True) 7962 elif self._match_text_seq("DATABASE"): 7963 this = self._parse_table(schema=True, is_db_reference=True) 7964 elif self._match_text_seq("CLUSTER"): 7965 this = self._parse_table() 7966 # Try matching inner expr keywords before fallback to parse table. 7967 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7968 kind = None 7969 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7970 else: 7971 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7972 kind = None 7973 this = self._parse_table_parts() 7974 7975 partition = self._try_parse(self._parse_partition) 7976 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7977 return self._parse_as_command(start) 7978 7979 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7980 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7981 "WITH", "ASYNC", "MODE" 7982 ): 7983 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7984 else: 7985 mode = None 7986 7987 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7988 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7989 7990 properties = self._parse_properties() 7991 return self.expression( 7992 exp.Analyze, 7993 kind=kind, 7994 this=this, 7995 mode=mode, 7996 partition=partition, 7997 properties=properties, 7998 expression=inner_expression, 7999 options=options, 8000 ) 8001 8002 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 8003 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 8004 this = None 8005 kind = self._prev.text.upper() 8006 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 8007 expressions = [] 8008 8009 if not self._match_text_seq("STATISTICS"): 8010 self.raise_error("Expecting token STATISTICS") 8011 8012 if self._match_text_seq("NOSCAN"): 8013 this = "NOSCAN" 8014 elif self._match(TokenType.FOR): 8015 if self._match_text_seq("ALL", "COLUMNS"): 8016 this = "FOR ALL COLUMNS" 8017 if self._match_texts("COLUMNS"): 8018 this = "FOR COLUMNS" 8019 expressions = self._parse_csv(self._parse_column_reference) 8020 elif self._match_text_seq("SAMPLE"): 8021 sample = self._parse_number() 8022 expressions = [ 8023 self.expression( 8024 exp.AnalyzeSample, 8025 sample=sample, 8026 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 8027 ) 8028 ] 8029 8030 return self.expression( 8031 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 8032 ) 8033 8034 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 8035 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 8036 kind = None 8037 this = None 8038 expression: t.Optional[exp.Expression] = None 8039 if self._match_text_seq("REF", "UPDATE"): 8040 kind = "REF" 8041 this = "UPDATE" 8042 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 8043 this = "UPDATE SET DANGLING TO NULL" 8044 elif self._match_text_seq("STRUCTURE"): 8045 kind = "STRUCTURE" 8046 if self._match_text_seq("CASCADE", "FAST"): 8047 this = "CASCADE FAST" 8048 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 8049 ("ONLINE", "OFFLINE") 8050 ): 8051 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 8052 expression = self._parse_into() 8053 8054 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 8055 8056 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 8057 this = self._prev.text.upper() 8058 if self._match_text_seq("COLUMNS"): 8059 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 8060 return None 8061 8062 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 8063 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 8064 if self._match_text_seq("STATISTICS"): 8065 return self.expression(exp.AnalyzeDelete, kind=kind) 8066 return None 8067 8068 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 8069 if self._match_text_seq("CHAINED", "ROWS"): 8070 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 8071 return None 8072 8073 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 8074 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 8075 this = self._prev.text.upper() 8076 expression: t.Optional[exp.Expression] = None 8077 expressions = [] 8078 update_options = None 8079 8080 if self._match_text_seq("HISTOGRAM", "ON"): 8081 expressions = self._parse_csv(self._parse_column_reference) 8082 with_expressions = [] 8083 while self._match(TokenType.WITH): 8084 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 8085 if self._match_texts(("SYNC", "ASYNC")): 8086 if self._match_text_seq("MODE", advance=False): 8087 with_expressions.append(f"{self._prev.text.upper()} MODE") 8088 self._advance() 8089 else: 8090 buckets = self._parse_number() 8091 if self._match_text_seq("BUCKETS"): 8092 with_expressions.append(f"{buckets} BUCKETS") 8093 if with_expressions: 8094 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 8095 8096 if self._match_texts(("MANUAL", "AUTO")) and self._match( 8097 TokenType.UPDATE, advance=False 8098 ): 8099 update_options = self._prev.text.upper() 8100 self._advance() 8101 elif self._match_text_seq("USING", "DATA"): 8102 expression = self.expression(exp.UsingData, this=self._parse_string()) 8103 8104 return self.expression( 8105 exp.AnalyzeHistogram, 8106 this=this, 8107 expressions=expressions, 8108 expression=expression, 8109 update_options=update_options, 8110 ) 8111 8112 def _parse_merge(self) -> exp.Merge: 8113 self._match(TokenType.INTO) 8114 target = self._parse_table() 8115 8116 if target and self._match(TokenType.ALIAS, advance=False): 8117 target.set("alias", self._parse_table_alias()) 8118 8119 self._match(TokenType.USING) 8120 using = self._parse_table() 8121 8122 return self.expression( 8123 exp.Merge, 8124 this=target, 8125 using=using, 8126 on=self._match(TokenType.ON) and self._parse_disjunction(), 8127 using_cond=self._match(TokenType.USING) and self._parse_using_identifiers(), 8128 whens=self._parse_when_matched(), 8129 returning=self._parse_returning(), 8130 ) 8131 8132 def _parse_when_matched(self) -> exp.Whens: 8133 whens = [] 8134 8135 while self._match(TokenType.WHEN): 8136 matched = not self._match(TokenType.NOT) 8137 self._match_text_seq("MATCHED") 8138 source = ( 8139 False 8140 if self._match_text_seq("BY", "TARGET") 8141 else self._match_text_seq("BY", "SOURCE") 8142 ) 8143 condition = self._parse_disjunction() if self._match(TokenType.AND) else None 8144 8145 self._match(TokenType.THEN) 8146 8147 if self._match(TokenType.INSERT): 8148 this = self._parse_star() 8149 if this: 8150 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 8151 else: 8152 then = self.expression( 8153 exp.Insert, 8154 this=exp.var("ROW") 8155 if self._match_text_seq("ROW") 8156 else self._parse_value(values=False), 8157 expression=self._match_text_seq("VALUES") and self._parse_value(), 8158 ) 8159 elif self._match(TokenType.UPDATE): 8160 expressions = self._parse_star() 8161 if expressions: 8162 then = self.expression(exp.Update, expressions=expressions) 8163 else: 8164 then = self.expression( 8165 exp.Update, 8166 expressions=self._match(TokenType.SET) 8167 and self._parse_csv(self._parse_equality), 8168 ) 8169 elif self._match(TokenType.DELETE): 8170 then = self.expression(exp.Var, this=self._prev.text) 8171 else: 8172 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 8173 8174 whens.append( 8175 self.expression( 8176 exp.When, 8177 matched=matched, 8178 source=source, 8179 condition=condition, 8180 then=then, 8181 ) 8182 ) 8183 return self.expression(exp.Whens, expressions=whens) 8184 8185 def _parse_show(self) -> t.Optional[exp.Expression]: 8186 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 8187 if parser: 8188 return parser(self) 8189 return self._parse_as_command(self._prev) 8190 8191 def _parse_set_item_assignment( 8192 self, kind: t.Optional[str] = None 8193 ) -> t.Optional[exp.Expression]: 8194 index = self._index 8195 8196 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 8197 return self._parse_set_transaction(global_=kind == "GLOBAL") 8198 8199 left = self._parse_primary() or self._parse_column() 8200 assignment_delimiter = self._match_texts(self.SET_ASSIGNMENT_DELIMITERS) 8201 8202 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 8203 self._retreat(index) 8204 return None 8205 8206 right = self._parse_statement() or self._parse_id_var() 8207 if isinstance(right, (exp.Column, exp.Identifier)): 8208 right = exp.var(right.name) 8209 8210 this = self.expression(exp.EQ, this=left, expression=right) 8211 return self.expression(exp.SetItem, this=this, kind=kind) 8212 8213 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 8214 self._match_text_seq("TRANSACTION") 8215 characteristics = self._parse_csv( 8216 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 8217 ) 8218 return self.expression( 8219 exp.SetItem, 8220 expressions=characteristics, 8221 kind="TRANSACTION", 8222 global_=global_, 8223 ) 8224 8225 def _parse_set_item(self) -> t.Optional[exp.Expression]: 8226 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 8227 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 8228 8229 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 8230 index = self._index 8231 set_ = self.expression( 8232 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 8233 ) 8234 8235 if self._curr: 8236 self._retreat(index) 8237 return self._parse_as_command(self._prev) 8238 8239 return set_ 8240 8241 def _parse_var_from_options( 8242 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8243 ) -> t.Optional[exp.Var]: 8244 start = self._curr 8245 if not start: 8246 return None 8247 8248 option = start.text.upper() 8249 continuations = options.get(option) 8250 8251 index = self._index 8252 self._advance() 8253 for keywords in continuations or []: 8254 if isinstance(keywords, str): 8255 keywords = (keywords,) 8256 8257 if self._match_text_seq(*keywords): 8258 option = f"{option} {' '.join(keywords)}" 8259 break 8260 else: 8261 if continuations or continuations is None: 8262 if raise_unmatched: 8263 self.raise_error(f"Unknown option {option}") 8264 8265 self._retreat(index) 8266 return None 8267 8268 return exp.var(option) 8269 8270 def _parse_as_command(self, start: Token) -> exp.Command: 8271 while self._curr: 8272 self._advance() 8273 text = self._find_sql(start, self._prev) 8274 size = len(start.text) 8275 self._warn_unsupported() 8276 return exp.Command(this=text[:size], expression=text[size:]) 8277 8278 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8279 settings = [] 8280 8281 self._match_l_paren() 8282 kind = self._parse_id_var() 8283 8284 if self._match(TokenType.L_PAREN): 8285 while True: 8286 key = self._parse_id_var() 8287 value = self._parse_primary() 8288 if not key and value is None: 8289 break 8290 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8291 self._match(TokenType.R_PAREN) 8292 8293 self._match_r_paren() 8294 8295 return self.expression( 8296 exp.DictProperty, 8297 this=this, 8298 kind=kind.this if kind else None, 8299 settings=settings, 8300 ) 8301 8302 def _parse_dict_range(self, this: str) -> exp.DictRange: 8303 self._match_l_paren() 8304 has_min = self._match_text_seq("MIN") 8305 if has_min: 8306 min = self._parse_var() or self._parse_primary() 8307 self._match_text_seq("MAX") 8308 max = self._parse_var() or self._parse_primary() 8309 else: 8310 max = self._parse_var() or self._parse_primary() 8311 min = exp.Literal.number(0) 8312 self._match_r_paren() 8313 return self.expression(exp.DictRange, this=this, min=min, max=max) 8314 8315 def _parse_comprehension( 8316 self, this: t.Optional[exp.Expression] 8317 ) -> t.Optional[exp.Comprehension]: 8318 index = self._index 8319 expression = self._parse_column() 8320 position = self._match(TokenType.COMMA) and self._parse_column() 8321 8322 if not self._match(TokenType.IN): 8323 self._retreat(index - 1) 8324 return None 8325 iterator = self._parse_column() 8326 condition = self._parse_disjunction() if self._match_text_seq("IF") else None 8327 return self.expression( 8328 exp.Comprehension, 8329 this=this, 8330 expression=expression, 8331 position=position, 8332 iterator=iterator, 8333 condition=condition, 8334 ) 8335 8336 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8337 if self._match(TokenType.HEREDOC_STRING): 8338 return self.expression(exp.Heredoc, this=self._prev.text) 8339 8340 if not self._match_text_seq("$"): 8341 return None 8342 8343 tags = ["$"] 8344 tag_text = None 8345 8346 if self._is_connected(): 8347 self._advance() 8348 tags.append(self._prev.text.upper()) 8349 else: 8350 self.raise_error("No closing $ found") 8351 8352 if tags[-1] != "$": 8353 if self._is_connected() and self._match_text_seq("$"): 8354 tag_text = tags[-1] 8355 tags.append("$") 8356 else: 8357 self.raise_error("No closing $ found") 8358 8359 heredoc_start = self._curr 8360 8361 while self._curr: 8362 if self._match_text_seq(*tags, advance=False): 8363 this = self._find_sql(heredoc_start, self._prev) 8364 self._advance(len(tags)) 8365 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8366 8367 self._advance() 8368 8369 self.raise_error(f"No closing {''.join(tags)} found") 8370 return None 8371 8372 def _find_parser( 8373 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8374 ) -> t.Optional[t.Callable]: 8375 if not self._curr: 8376 return None 8377 8378 index = self._index 8379 this = [] 8380 while True: 8381 # The current token might be multiple words 8382 curr = self._curr.text.upper() 8383 key = curr.split(" ") 8384 this.append(curr) 8385 8386 self._advance() 8387 result, trie = in_trie(trie, key) 8388 if result == TrieResult.FAILED: 8389 break 8390 8391 if result == TrieResult.EXISTS: 8392 subparser = parsers[" ".join(this)] 8393 return subparser 8394 8395 self._retreat(index) 8396 return None 8397 8398 def _match(self, token_type, advance=True, expression=None): 8399 if not self._curr: 8400 return None 8401 8402 if self._curr.token_type == token_type: 8403 if advance: 8404 self._advance() 8405 self._add_comments(expression) 8406 return True 8407 8408 return None 8409 8410 def _match_set(self, types, advance=True): 8411 if not self._curr: 8412 return None 8413 8414 if self._curr.token_type in types: 8415 if advance: 8416 self._advance() 8417 return True 8418 8419 return None 8420 8421 def _match_pair(self, token_type_a, token_type_b, advance=True): 8422 if not self._curr or not self._next: 8423 return None 8424 8425 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8426 if advance: 8427 self._advance(2) 8428 return True 8429 8430 return None 8431 8432 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8433 if not self._match(TokenType.L_PAREN, expression=expression): 8434 self.raise_error("Expecting (") 8435 8436 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8437 if not self._match(TokenType.R_PAREN, expression=expression): 8438 self.raise_error("Expecting )") 8439 8440 def _match_texts(self, texts, advance=True): 8441 if ( 8442 self._curr 8443 and self._curr.token_type != TokenType.STRING 8444 and self._curr.text.upper() in texts 8445 ): 8446 if advance: 8447 self._advance() 8448 return True 8449 return None 8450 8451 def _match_text_seq(self, *texts, advance=True): 8452 index = self._index 8453 for text in texts: 8454 if ( 8455 self._curr 8456 and self._curr.token_type != TokenType.STRING 8457 and self._curr.text.upper() == text 8458 ): 8459 self._advance() 8460 else: 8461 self._retreat(index) 8462 return None 8463 8464 if not advance: 8465 self._retreat(index) 8466 8467 return True 8468 8469 def _replace_lambda( 8470 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8471 ) -> t.Optional[exp.Expression]: 8472 if not node: 8473 return node 8474 8475 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8476 8477 for column in node.find_all(exp.Column): 8478 typ = lambda_types.get(column.parts[0].name) 8479 if typ is not None: 8480 dot_or_id = column.to_dot() if column.table else column.this 8481 8482 if typ: 8483 dot_or_id = self.expression( 8484 exp.Cast, 8485 this=dot_or_id, 8486 to=typ, 8487 ) 8488 8489 parent = column.parent 8490 8491 while isinstance(parent, exp.Dot): 8492 if not isinstance(parent.parent, exp.Dot): 8493 parent.replace(dot_or_id) 8494 break 8495 parent = parent.parent 8496 else: 8497 if column is node: 8498 node = dot_or_id 8499 else: 8500 column.replace(dot_or_id) 8501 return node 8502 8503 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8504 start = self._prev 8505 8506 # Not to be confused with TRUNCATE(number, decimals) function call 8507 if self._match(TokenType.L_PAREN): 8508 self._retreat(self._index - 2) 8509 return self._parse_function() 8510 8511 # Clickhouse supports TRUNCATE DATABASE as well 8512 is_database = self._match(TokenType.DATABASE) 8513 8514 self._match(TokenType.TABLE) 8515 8516 exists = self._parse_exists(not_=False) 8517 8518 expressions = self._parse_csv( 8519 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8520 ) 8521 8522 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8523 8524 if self._match_text_seq("RESTART", "IDENTITY"): 8525 identity = "RESTART" 8526 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8527 identity = "CONTINUE" 8528 else: 8529 identity = None 8530 8531 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8532 option = self._prev.text 8533 else: 8534 option = None 8535 8536 partition = self._parse_partition() 8537 8538 # Fallback case 8539 if self._curr: 8540 return self._parse_as_command(start) 8541 8542 return self.expression( 8543 exp.TruncateTable, 8544 expressions=expressions, 8545 is_database=is_database, 8546 exists=exists, 8547 cluster=cluster, 8548 identity=identity, 8549 option=option, 8550 partition=partition, 8551 ) 8552 8553 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8554 this = self._parse_ordered(self._parse_opclass) 8555 8556 if not self._match(TokenType.WITH): 8557 return this 8558 8559 op = self._parse_var(any_token=True) 8560 8561 return self.expression(exp.WithOperator, this=this, op=op) 8562 8563 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8564 self._match(TokenType.EQ) 8565 self._match(TokenType.L_PAREN) 8566 8567 opts: t.List[t.Optional[exp.Expression]] = [] 8568 option: exp.Expression | None 8569 while self._curr and not self._match(TokenType.R_PAREN): 8570 if self._match_text_seq("FORMAT_NAME", "="): 8571 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8572 option = self._parse_format_name() 8573 else: 8574 option = self._parse_property() 8575 8576 if option is None: 8577 self.raise_error("Unable to parse option") 8578 break 8579 8580 opts.append(option) 8581 8582 return opts 8583 8584 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8585 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8586 8587 options = [] 8588 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8589 option = self._parse_var(any_token=True) 8590 prev = self._prev.text.upper() 8591 8592 # Different dialects might separate options and values by white space, "=" and "AS" 8593 self._match(TokenType.EQ) 8594 self._match(TokenType.ALIAS) 8595 8596 param = self.expression(exp.CopyParameter, this=option) 8597 8598 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8599 TokenType.L_PAREN, advance=False 8600 ): 8601 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8602 param.set("expressions", self._parse_wrapped_options()) 8603 elif prev == "FILE_FORMAT": 8604 # T-SQL's external file format case 8605 param.set("expression", self._parse_field()) 8606 elif ( 8607 prev == "FORMAT" 8608 and self._prev.token_type == TokenType.ALIAS 8609 and self._match_texts(("AVRO", "JSON")) 8610 ): 8611 param.set("this", exp.var(f"FORMAT AS {self._prev.text.upper()}")) 8612 param.set("expression", self._parse_field()) 8613 else: 8614 param.set("expression", self._parse_unquoted_field() or self._parse_bracket()) 8615 8616 options.append(param) 8617 self._match(sep) 8618 8619 return options 8620 8621 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8622 expr = self.expression(exp.Credentials) 8623 8624 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8625 expr.set("storage", self._parse_field()) 8626 if self._match_text_seq("CREDENTIALS"): 8627 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8628 creds = ( 8629 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8630 ) 8631 expr.set("credentials", creds) 8632 if self._match_text_seq("ENCRYPTION"): 8633 expr.set("encryption", self._parse_wrapped_options()) 8634 if self._match_text_seq("IAM_ROLE"): 8635 expr.set( 8636 "iam_role", 8637 exp.var(self._prev.text) if self._match(TokenType.DEFAULT) else self._parse_field(), 8638 ) 8639 if self._match_text_seq("REGION"): 8640 expr.set("region", self._parse_field()) 8641 8642 return expr 8643 8644 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8645 return self._parse_field() 8646 8647 def _parse_copy(self) -> exp.Copy | exp.Command: 8648 start = self._prev 8649 8650 self._match(TokenType.INTO) 8651 8652 this = ( 8653 self._parse_select(nested=True, parse_subquery_alias=False) 8654 if self._match(TokenType.L_PAREN, advance=False) 8655 else self._parse_table(schema=True) 8656 ) 8657 8658 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8659 8660 files = self._parse_csv(self._parse_file_location) 8661 if self._match(TokenType.EQ, advance=False): 8662 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8663 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8664 # list via `_parse_wrapped(..)` below. 8665 self._advance(-1) 8666 files = [] 8667 8668 credentials = self._parse_credentials() 8669 8670 self._match_text_seq("WITH") 8671 8672 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8673 8674 # Fallback case 8675 if self._curr: 8676 return self._parse_as_command(start) 8677 8678 return self.expression( 8679 exp.Copy, 8680 this=this, 8681 kind=kind, 8682 credentials=credentials, 8683 files=files, 8684 params=params, 8685 ) 8686 8687 def _parse_normalize(self) -> exp.Normalize: 8688 return self.expression( 8689 exp.Normalize, 8690 this=self._parse_bitwise(), 8691 form=self._match(TokenType.COMMA) and self._parse_var(), 8692 ) 8693 8694 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8695 args = self._parse_csv(lambda: self._parse_lambda()) 8696 8697 this = seq_get(args, 0) 8698 decimals = seq_get(args, 1) 8699 8700 return expr_type( 8701 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8702 ) 8703 8704 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8705 star_token = self._prev 8706 8707 if self._match_text_seq("COLUMNS", "(", advance=False): 8708 this = self._parse_function() 8709 if isinstance(this, exp.Columns): 8710 this.set("unpack", True) 8711 return this 8712 8713 return self.expression( 8714 exp.Star, 8715 except_=self._parse_star_op("EXCEPT", "EXCLUDE"), 8716 replace=self._parse_star_op("REPLACE"), 8717 rename=self._parse_star_op("RENAME"), 8718 ).update_positions(star_token) 8719 8720 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8721 privilege_parts = [] 8722 8723 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8724 # (end of privilege list) or L_PAREN (start of column list) are met 8725 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8726 privilege_parts.append(self._curr.text.upper()) 8727 self._advance() 8728 8729 this = exp.var(" ".join(privilege_parts)) 8730 expressions = ( 8731 self._parse_wrapped_csv(self._parse_column) 8732 if self._match(TokenType.L_PAREN, advance=False) 8733 else None 8734 ) 8735 8736 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8737 8738 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8739 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8740 principal = self._parse_id_var() 8741 8742 if not principal: 8743 return None 8744 8745 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8746 8747 def _parse_grant_revoke_common( 8748 self, 8749 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8750 privileges = self._parse_csv(self._parse_grant_privilege) 8751 8752 self._match(TokenType.ON) 8753 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8754 8755 # Attempt to parse the securable e.g. MySQL allows names 8756 # such as "foo.*", "*.*" which are not easily parseable yet 8757 securable = self._try_parse(self._parse_table_parts) 8758 8759 return privileges, kind, securable 8760 8761 def _parse_grant(self) -> exp.Grant | exp.Command: 8762 start = self._prev 8763 8764 privileges, kind, securable = self._parse_grant_revoke_common() 8765 8766 if not securable or not self._match_text_seq("TO"): 8767 return self._parse_as_command(start) 8768 8769 principals = self._parse_csv(self._parse_grant_principal) 8770 8771 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8772 8773 if self._curr: 8774 return self._parse_as_command(start) 8775 8776 return self.expression( 8777 exp.Grant, 8778 privileges=privileges, 8779 kind=kind, 8780 securable=securable, 8781 principals=principals, 8782 grant_option=grant_option, 8783 ) 8784 8785 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8786 start = self._prev 8787 8788 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8789 8790 privileges, kind, securable = self._parse_grant_revoke_common() 8791 8792 if not securable or not self._match_text_seq("FROM"): 8793 return self._parse_as_command(start) 8794 8795 principals = self._parse_csv(self._parse_grant_principal) 8796 8797 cascade = None 8798 if self._match_texts(("CASCADE", "RESTRICT")): 8799 cascade = self._prev.text.upper() 8800 8801 if self._curr: 8802 return self._parse_as_command(start) 8803 8804 return self.expression( 8805 exp.Revoke, 8806 privileges=privileges, 8807 kind=kind, 8808 securable=securable, 8809 principals=principals, 8810 grant_option=grant_option, 8811 cascade=cascade, 8812 ) 8813 8814 def _parse_overlay(self) -> exp.Overlay: 8815 def _parse_overlay_arg(text: str) -> t.Optional[exp.Expression]: 8816 return ( 8817 self._match(TokenType.COMMA) or self._match_text_seq(text) 8818 ) and self._parse_bitwise() 8819 8820 return self.expression( 8821 exp.Overlay, 8822 this=self._parse_bitwise(), 8823 expression=_parse_overlay_arg("PLACING"), 8824 from_=_parse_overlay_arg("FROM"), 8825 for_=_parse_overlay_arg("FOR"), 8826 ) 8827 8828 def _parse_format_name(self) -> exp.Property: 8829 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8830 # for FILE_FORMAT = <format_name> 8831 return self.expression( 8832 exp.Property, 8833 this=exp.var("FORMAT_NAME"), 8834 value=self._parse_string() or self._parse_table_parts(), 8835 ) 8836 8837 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8838 args: t.List[exp.Expression] = [] 8839 8840 if self._match(TokenType.DISTINCT): 8841 args.append(self.expression(exp.Distinct, expressions=[self._parse_lambda()])) 8842 self._match(TokenType.COMMA) 8843 8844 args.extend(self._parse_function_args()) 8845 8846 return self.expression( 8847 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8848 ) 8849 8850 def _identifier_expression( 8851 self, token: t.Optional[Token] = None, **kwargs: t.Any 8852 ) -> exp.Identifier: 8853 return self.expression(exp.Identifier, token=token or self._prev, **kwargs) 8854 8855 def _build_pipe_cte( 8856 self, 8857 query: exp.Query, 8858 expressions: t.List[exp.Expression], 8859 alias_cte: t.Optional[exp.TableAlias] = None, 8860 ) -> exp.Select: 8861 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8862 if alias_cte: 8863 new_cte = alias_cte 8864 else: 8865 self._pipe_cte_counter += 1 8866 new_cte = f"__tmp{self._pipe_cte_counter}" 8867 8868 with_ = query.args.get("with_") 8869 ctes = with_.pop() if with_ else None 8870 8871 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8872 if ctes: 8873 new_select.set("with_", ctes) 8874 8875 return new_select.with_(new_cte, as_=query, copy=False) 8876 8877 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8878 select = self._parse_select(consume_pipe=False) 8879 if not select: 8880 return query 8881 8882 return self._build_pipe_cte( 8883 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8884 ) 8885 8886 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8887 limit = self._parse_limit() 8888 offset = self._parse_offset() 8889 if limit: 8890 curr_limit = query.args.get("limit", limit) 8891 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8892 query.limit(limit, copy=False) 8893 if offset: 8894 curr_offset = query.args.get("offset") 8895 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8896 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8897 8898 return query 8899 8900 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8901 this = self._parse_disjunction() 8902 if self._match_text_seq("GROUP", "AND", advance=False): 8903 return this 8904 8905 this = self._parse_alias(this) 8906 8907 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8908 return self._parse_ordered(lambda: this) 8909 8910 return this 8911 8912 def _parse_pipe_syntax_aggregate_group_order_by( 8913 self, query: exp.Select, group_by_exists: bool = True 8914 ) -> exp.Select: 8915 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8916 aggregates_or_groups, orders = [], [] 8917 for element in expr: 8918 if isinstance(element, exp.Ordered): 8919 this = element.this 8920 if isinstance(this, exp.Alias): 8921 element.set("this", this.args["alias"]) 8922 orders.append(element) 8923 else: 8924 this = element 8925 aggregates_or_groups.append(this) 8926 8927 if group_by_exists: 8928 query.select(*aggregates_or_groups, copy=False).group_by( 8929 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8930 copy=False, 8931 ) 8932 else: 8933 query.select(*aggregates_or_groups, append=False, copy=False) 8934 8935 if orders: 8936 return query.order_by(*orders, append=False, copy=False) 8937 8938 return query 8939 8940 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8941 self._match_text_seq("AGGREGATE") 8942 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8943 8944 if self._match(TokenType.GROUP_BY) or ( 8945 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8946 ): 8947 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8948 8949 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8950 8951 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8952 first_setop = self.parse_set_operation(this=query) 8953 if not first_setop: 8954 return None 8955 8956 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8957 expr = self._parse_paren() 8958 return expr.assert_is(exp.Subquery).unnest() if expr else None 8959 8960 first_setop.this.pop() 8961 8962 setops = [ 8963 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8964 *self._parse_csv(_parse_and_unwrap_query), 8965 ] 8966 8967 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8968 with_ = query.args.get("with_") 8969 ctes = with_.pop() if with_ else None 8970 8971 if isinstance(first_setop, exp.Union): 8972 query = query.union(*setops, copy=False, **first_setop.args) 8973 elif isinstance(first_setop, exp.Except): 8974 query = query.except_(*setops, copy=False, **first_setop.args) 8975 else: 8976 query = query.intersect(*setops, copy=False, **first_setop.args) 8977 8978 query.set("with_", ctes) 8979 8980 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8981 8982 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8983 join = self._parse_join() 8984 if not join: 8985 return None 8986 8987 if isinstance(query, exp.Select): 8988 return query.join(join, copy=False) 8989 8990 return query 8991 8992 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8993 pivots = self._parse_pivots() 8994 if not pivots: 8995 return query 8996 8997 from_ = query.args.get("from_") 8998 if from_: 8999 from_.this.set("pivots", pivots) 9000 9001 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9002 9003 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 9004 self._match_text_seq("EXTEND") 9005 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 9006 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9007 9008 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 9009 sample = self._parse_table_sample() 9010 9011 with_ = query.args.get("with_") 9012 if with_: 9013 with_.expressions[-1].this.set("sample", sample) 9014 else: 9015 query.set("sample", sample) 9016 9017 return query 9018 9019 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 9020 if isinstance(query, exp.Subquery): 9021 query = exp.select("*").from_(query, copy=False) 9022 9023 if not query.args.get("from_"): 9024 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 9025 9026 while self._match(TokenType.PIPE_GT): 9027 start = self._curr 9028 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 9029 if not parser: 9030 # The set operators (UNION, etc) and the JOIN operator have a few common starting 9031 # keywords, making it tricky to disambiguate them without lookahead. The approach 9032 # here is to try and parse a set operation and if that fails, then try to parse a 9033 # join operator. If that fails as well, then the operator is not supported. 9034 parsed_query = self._parse_pipe_syntax_set_operator(query) 9035 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 9036 if not parsed_query: 9037 self._retreat(start) 9038 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 9039 break 9040 query = parsed_query 9041 else: 9042 query = parser(self, query) 9043 9044 return query 9045 9046 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 9047 vars = self._parse_csv(self._parse_id_var) 9048 if not vars: 9049 return None 9050 9051 return self.expression( 9052 exp.DeclareItem, 9053 this=vars, 9054 kind=self._parse_types(), 9055 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 9056 ) 9057 9058 def _parse_declare(self) -> exp.Declare | exp.Command: 9059 start = self._prev 9060 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 9061 9062 if not expressions or self._curr: 9063 return self._parse_as_command(start) 9064 9065 return self.expression(exp.Declare, expressions=expressions) 9066 9067 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 9068 exp_class = exp.Cast if strict else exp.TryCast 9069 9070 if exp_class == exp.TryCast: 9071 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 9072 9073 return self.expression(exp_class, **kwargs) 9074 9075 def _parse_json_value(self) -> exp.JSONValue: 9076 this = self._parse_bitwise() 9077 self._match(TokenType.COMMA) 9078 path = self._parse_bitwise() 9079 9080 returning = self._match(TokenType.RETURNING) and self._parse_type() 9081 9082 return self.expression( 9083 exp.JSONValue, 9084 this=this, 9085 path=self.dialect.to_json_path(path), 9086 returning=returning, 9087 on_condition=self._parse_on_condition(), 9088 ) 9089 9090 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 9091 def concat_exprs( 9092 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 9093 ) -> exp.Expression: 9094 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 9095 concat_exprs = [ 9096 self.expression( 9097 exp.Concat, 9098 expressions=node.expressions, 9099 safe=True, 9100 coalesce=self.dialect.CONCAT_COALESCE, 9101 ) 9102 ] 9103 node.set("expressions", concat_exprs) 9104 return node 9105 if len(exprs) == 1: 9106 return exprs[0] 9107 return self.expression( 9108 exp.Concat, expressions=args, safe=True, coalesce=self.dialect.CONCAT_COALESCE 9109 ) 9110 9111 args = self._parse_csv(self._parse_lambda) 9112 9113 if args: 9114 order = args[-1] if isinstance(args[-1], exp.Order) else None 9115 9116 if order: 9117 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 9118 # remove 'expr' from exp.Order and add it back to args 9119 args[-1] = order.this 9120 order.set("this", concat_exprs(order.this, args)) 9121 9122 this = order or concat_exprs(args[0], args) 9123 else: 9124 this = None 9125 9126 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 9127 9128 return self.expression(exp.GroupConcat, this=this, separator=separator) 9129 9130 def _parse_initcap(self) -> exp.Initcap: 9131 expr = exp.Initcap.from_arg_list(self._parse_function_args()) 9132 9133 # attach dialect's default delimiters 9134 if expr.args.get("expression") is None: 9135 expr.set("expression", exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS)) 9136 9137 return expr 9138 9139 def _parse_operator(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 9140 while True: 9141 if not self._match(TokenType.L_PAREN): 9142 break 9143 9144 op = "" 9145 while self._curr and not self._match(TokenType.R_PAREN): 9146 op += self._curr.text 9147 self._advance() 9148 9149 this = self.expression( 9150 exp.Operator, 9151 comments=self._prev_comments, 9152 this=this, 9153 operator=op, 9154 expression=self._parse_bitwise(), 9155 ) 9156 9157 if not self._match(TokenType.OPERATOR): 9158 break 9159 9160 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1630 def __init__( 1631 self, 1632 error_level: t.Optional[ErrorLevel] = None, 1633 error_message_context: int = 100, 1634 max_errors: int = 3, 1635 dialect: DialectType = None, 1636 ): 1637 from sqlglot.dialects import Dialect 1638 1639 self.error_level = error_level or ErrorLevel.IMMEDIATE 1640 self.error_message_context = error_message_context 1641 self.max_errors = max_errors 1642 self.dialect = Dialect.get_or_raise(dialect) 1643 self.reset()
1656 def parse( 1657 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1658 ) -> t.List[t.Optional[exp.Expression]]: 1659 """ 1660 Parses a list of tokens and returns a list of syntax trees, one tree 1661 per parsed SQL statement. 1662 1663 Args: 1664 raw_tokens: The list of tokens. 1665 sql: The original SQL string, used to produce helpful debug messages. 1666 1667 Returns: 1668 The list of the produced syntax trees. 1669 """ 1670 return self._parse( 1671 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1672 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1674 def parse_into( 1675 self, 1676 expression_types: exp.IntoType, 1677 raw_tokens: t.List[Token], 1678 sql: t.Optional[str] = None, 1679 ) -> t.List[t.Optional[exp.Expression]]: 1680 """ 1681 Parses a list of tokens into a given Expression type. If a collection of Expression 1682 types is given instead, this method will try to parse the token list into each one 1683 of them, stopping at the first for which the parsing succeeds. 1684 1685 Args: 1686 expression_types: The expression type(s) to try and parse the token list into. 1687 raw_tokens: The list of tokens. 1688 sql: The original SQL string, used to produce helpful debug messages. 1689 1690 Returns: 1691 The target Expression. 1692 """ 1693 errors = [] 1694 for expression_type in ensure_list(expression_types): 1695 parser = self.EXPRESSION_PARSERS.get(expression_type) 1696 if not parser: 1697 raise TypeError(f"No parser registered for {expression_type}") 1698 1699 try: 1700 return self._parse(parser, raw_tokens, sql) 1701 except ParseError as e: 1702 e.errors[0]["into_expression"] = expression_type 1703 errors.append(e) 1704 1705 raise ParseError( 1706 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1707 errors=merge_errors(errors), 1708 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1748 def check_errors(self) -> None: 1749 """Logs or raises any found errors, depending on the chosen error level setting.""" 1750 if self.error_level == ErrorLevel.WARN: 1751 for error in self.errors: 1752 logger.error(str(error)) 1753 elif self.error_level == ErrorLevel.RAISE and self.errors: 1754 raise ParseError( 1755 concat_messages(self.errors, self.max_errors), 1756 errors=merge_errors(self.errors), 1757 )
Logs or raises any found errors, depending on the chosen error level setting.
1759 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1760 """ 1761 Appends an error in the list of recorded errors or raises it, depending on the chosen 1762 error level setting. 1763 """ 1764 token = token or self._curr or self._prev or Token.string("") 1765 formatted_sql, start_context, highlight, end_context = highlight_sql( 1766 sql=self.sql, 1767 positions=[(token.start, token.end)], 1768 context_length=self.error_message_context, 1769 ) 1770 formatted_message = f"{message}. Line {token.line}, Col: {token.col}.\n {formatted_sql}" 1771 1772 error = ParseError.new( 1773 formatted_message, 1774 description=message, 1775 line=token.line, 1776 col=token.col, 1777 start_context=start_context, 1778 highlight=highlight, 1779 end_context=end_context, 1780 ) 1781 1782 if self.error_level == ErrorLevel.IMMEDIATE: 1783 raise error 1784 1785 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1787 def expression( 1788 self, 1789 exp_class: t.Type[E], 1790 token: t.Optional[Token] = None, 1791 comments: t.Optional[t.List[str]] = None, 1792 **kwargs, 1793 ) -> E: 1794 """ 1795 Creates a new, validated Expression. 1796 1797 Args: 1798 exp_class: The expression class to instantiate. 1799 comments: An optional list of comments to attach to the expression. 1800 kwargs: The arguments to set for the expression along with their respective values. 1801 1802 Returns: 1803 The target expression. 1804 """ 1805 if token: 1806 instance = exp_class(this=token.text, **kwargs) 1807 instance.update_positions(token) 1808 else: 1809 instance = exp_class(**kwargs) 1810 instance.add_comments(comments) if comments else self._add_comments(instance) 1811 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1818 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1819 """ 1820 Validates an Expression, making sure that all its mandatory arguments are set. 1821 1822 Args: 1823 expression: The expression to validate. 1824 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1825 1826 Returns: 1827 The validated expression. 1828 """ 1829 if self.error_level != ErrorLevel.IGNORE: 1830 for error_message in expression.error_messages(args): 1831 self.raise_error(error_message) 1832 1833 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4972 def parse_set_operation( 4973 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4974 ) -> t.Optional[exp.Expression]: 4975 start = self._index 4976 _, side_token, kind_token = self._parse_join_parts() 4977 4978 side = side_token.text if side_token else None 4979 kind = kind_token.text if kind_token else None 4980 4981 if not self._match_set(self.SET_OPERATIONS): 4982 self._retreat(start) 4983 return None 4984 4985 token_type = self._prev.token_type 4986 4987 if token_type == TokenType.UNION: 4988 operation: t.Type[exp.SetOperation] = exp.Union 4989 elif token_type == TokenType.EXCEPT: 4990 operation = exp.Except 4991 else: 4992 operation = exp.Intersect 4993 4994 comments = self._prev.comments 4995 4996 if self._match(TokenType.DISTINCT): 4997 distinct: t.Optional[bool] = True 4998 elif self._match(TokenType.ALL): 4999 distinct = False 5000 else: 5001 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 5002 if distinct is None: 5003 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 5004 5005 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 5006 "STRICT", "CORRESPONDING" 5007 ) 5008 if self._match_text_seq("CORRESPONDING"): 5009 by_name = True 5010 if not side and not kind: 5011 kind = "INNER" 5012 5013 on_column_list = None 5014 if by_name and self._match_texts(("ON", "BY")): 5015 on_column_list = self._parse_wrapped_csv(self._parse_column) 5016 5017 expression = self._parse_select( 5018 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 5019 ) 5020 5021 return self.expression( 5022 operation, 5023 comments=comments, 5024 this=this, 5025 distinct=distinct, 5026 by_name=by_name, 5027 expression=expression, 5028 side=side, 5029 kind=kind, 5030 on=on_column_list, 5031 )