sqlglot.parser
1from __future__ import annotations 2 3import itertools 4import logging 5import re 6import typing as t 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, TokenError, concat_messages, merge_errors 11from sqlglot.expressions import apply_index_offset 12from sqlglot.helper import ensure_list, seq_get 13from sqlglot.parser_core import ParserCore 14from sqlglot.time import format_time 15from sqlglot.tokens import Token, Tokenizer, TokenType 16from sqlglot.trie import TrieResult, in_trie, new_trie 17 18if t.TYPE_CHECKING: 19 from sqlglot._typing import E, Lit 20 from sqlglot.dialects.dialect import Dialect, DialectType 21 22 T = t.TypeVar("T") 23 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 24 25logger = logging.getLogger("sqlglot") 26 27OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 28 29# Used to detect alphabetical characters and +/- in timestamp literals 30TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 31 32 33def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 34 if len(args) == 1 and args[0].is_star: 35 return exp.StarMap(this=args[0]) 36 37 keys = [] 38 values = [] 39 for i in range(0, len(args), 2): 40 keys.append(args[i]) 41 values.append(args[i + 1]) 42 43 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 44 45 46def build_like(args: t.List) -> exp.Escape | exp.Like: 47 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 48 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 49 50 51def binary_range_parser( 52 expr_type: t.Type[exp.Expression], reverse_args: bool = False 53) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 54 def _parse_binary_range( 55 self: Parser, this: t.Optional[exp.Expression] 56 ) -> t.Optional[exp.Expression]: 57 expression = self._parse_bitwise() 58 if reverse_args: 59 this, expression = expression, this 60 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 61 62 return _parse_binary_range 63 64 65def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 66 # Default argument order is base, expression 67 this = seq_get(args, 0) 68 expression = seq_get(args, 1) 69 70 if expression: 71 if not dialect.LOG_BASE_FIRST: 72 this, expression = expression, this 73 return exp.Log(this=this, expression=expression) 74 75 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 76 77 78def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 79 arg = seq_get(args, 0) 80 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 81 82 83def build_lower(args: t.List) -> exp.Lower | exp.Hex: 84 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 85 arg = seq_get(args, 0) 86 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 87 88 89def build_upper(args: t.List) -> exp.Upper | exp.Hex: 90 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 91 arg = seq_get(args, 0) 92 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 93 94 95def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 96 def _builder(args: t.List, dialect: Dialect) -> E: 97 expression = expr_type( 98 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 99 ) 100 if len(args) > 2 and expr_type is exp.JSONExtract: 101 expression.set("expressions", args[2:]) 102 if expr_type is exp.JSONExtractScalar: 103 expression.set("scalar_only", dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY) 104 105 return expression 106 107 return _builder 108 109 110def build_mod(args: t.List) -> exp.Mod: 111 this = seq_get(args, 0) 112 expression = seq_get(args, 1) 113 114 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 115 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 116 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 117 118 return exp.Mod(this=this, expression=expression) 119 120 121def build_pad(args: t.List, is_left: bool = True): 122 return exp.Pad( 123 this=seq_get(args, 0), 124 expression=seq_get(args, 1), 125 fill_pattern=seq_get(args, 2), 126 is_left=is_left, 127 ) 128 129 130def build_array_constructor( 131 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 132) -> exp.Expression: 133 array_exp = exp_class(expressions=args) 134 135 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 136 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 137 138 return array_exp 139 140 141def build_convert_timezone( 142 args: t.List, default_source_tz: t.Optional[str] = None 143) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 144 if len(args) == 2: 145 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 146 return exp.ConvertTimezone( 147 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 148 ) 149 150 return exp.ConvertTimezone.from_arg_list(args) 151 152 153def build_trim(args: t.List, is_left: bool = True, reverse_args: bool = False): 154 this, expression = seq_get(args, 0), seq_get(args, 1) 155 156 if expression and reverse_args: 157 this, expression = expression, this 158 159 return exp.Trim(this=this, expression=expression, position="LEADING" if is_left else "TRAILING") 160 161 162def build_coalesce( 163 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 164) -> exp.Coalesce: 165 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 166 167 168def build_locate_strposition(args: t.List): 169 return exp.StrPosition( 170 this=seq_get(args, 1), 171 substr=seq_get(args, 0), 172 position=seq_get(args, 2), 173 ) 174 175 176def build_array_append(args: t.List, dialect: Dialect) -> exp.ArrayAppend: 177 """ 178 Builds ArrayAppend with NULL propagation semantics based on the dialect configuration. 179 180 Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. 181 Others (DuckDB, PostgreSQL) create a new single-element array instead. 182 183 Args: 184 args: Function arguments [array, element] 185 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 186 187 Returns: 188 ArrayAppend expression with appropriate null_propagation flag 189 """ 190 return exp.ArrayAppend( 191 this=seq_get(args, 0), 192 expression=seq_get(args, 1), 193 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 194 ) 195 196 197def build_array_prepend(args: t.List, dialect: Dialect) -> exp.ArrayPrepend: 198 """ 199 Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration. 200 201 Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. 202 Others (DuckDB, PostgreSQL) create a new single-element array instead. 203 204 Args: 205 args: Function arguments [array, element] 206 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 207 208 Returns: 209 ArrayPrepend expression with appropriate null_propagation flag 210 """ 211 return exp.ArrayPrepend( 212 this=seq_get(args, 0), 213 expression=seq_get(args, 1), 214 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 215 ) 216 217 218def build_array_concat(args: t.List, dialect: Dialect) -> exp.ArrayConcat: 219 """ 220 Builds ArrayConcat with NULL propagation semantics based on the dialect configuration. 221 222 Some dialects (Redshift, Snowflake) return NULL when any input array is NULL. 223 Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation. 224 225 Args: 226 args: Function arguments [array1, array2, ...] (variadic) 227 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 228 229 Returns: 230 ArrayConcat expression with appropriate null_propagation flag 231 """ 232 return exp.ArrayConcat( 233 this=seq_get(args, 0), 234 expressions=args[1:], 235 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 236 ) 237 238 239def build_array_remove(args: t.List, dialect: Dialect) -> exp.ArrayRemove: 240 """ 241 Builds ArrayRemove with NULL propagation semantics based on the dialect configuration. 242 243 Some dialects (Snowflake) return NULL when the removal value is NULL. 244 Others (DuckDB) may return empty array due to NULL comparison semantics. 245 246 Args: 247 args: Function arguments [array, value_to_remove] 248 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 249 250 Returns: 251 ArrayRemove expression with appropriate null_propagation flag 252 """ 253 return exp.ArrayRemove( 254 this=seq_get(args, 0), 255 expression=seq_get(args, 1), 256 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 257 ) 258 259 260class _Parser(type): 261 def __new__(cls, clsname, bases, attrs): 262 klass = super().__new__(cls, clsname, bases, attrs) 263 264 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 265 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 266 267 return klass 268 269 270class Parser(metaclass=_Parser): 271 """ 272 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 273 274 Args: 275 error_level: The desired error level. 276 Default: ErrorLevel.IMMEDIATE 277 error_message_context: The amount of context to capture from a query string when displaying 278 the error message (in number of characters). 279 Default: 100 280 max_errors: Maximum number of error messages to include in a raised ParseError. 281 This is only relevant if error_level is ErrorLevel.RAISE. 282 Default: 3 283 """ 284 285 FUNCTIONS: t.Dict[str, t.Callable] = { 286 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 287 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 288 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 289 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 290 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 291 ), 292 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 293 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 294 ), 295 "ARRAY_APPEND": build_array_append, 296 "ARRAY_CAT": build_array_concat, 297 "ARRAY_CONCAT": build_array_concat, 298 "ARRAY_PREPEND": build_array_prepend, 299 "ARRAY_REMOVE": build_array_remove, 300 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 301 "CONCAT": lambda args, dialect: exp.Concat( 302 expressions=args, 303 safe=not dialect.STRICT_STRING_CONCAT, 304 coalesce=dialect.CONCAT_COALESCE, 305 ), 306 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 307 expressions=args, 308 safe=not dialect.STRICT_STRING_CONCAT, 309 coalesce=dialect.CONCAT_COALESCE, 310 ), 311 "CONVERT_TIMEZONE": build_convert_timezone, 312 "DATE_TO_DATE_STR": lambda args: exp.Cast( 313 this=seq_get(args, 0), 314 to=exp.DataType(this=exp.DataType.Type.TEXT), 315 ), 316 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 317 start=seq_get(args, 0), 318 end=seq_get(args, 1), 319 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 320 ), 321 "GENERATE_UUID": lambda args, dialect: exp.Uuid( 322 is_string=dialect.UUID_IS_STRING_TYPE or None 323 ), 324 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 325 "GREATEST": lambda args, dialect: exp.Greatest( 326 this=seq_get(args, 0), 327 expressions=args[1:], 328 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 329 ), 330 "LEAST": lambda args, dialect: exp.Least( 331 this=seq_get(args, 0), 332 expressions=args[1:], 333 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 334 ), 335 "HEX": build_hex, 336 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 337 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 338 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 339 "JSON_KEYS": lambda args, dialect: exp.JSONKeys( 340 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 341 ), 342 "LIKE": build_like, 343 "LOG": build_logarithm, 344 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 345 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 346 "LOWER": build_lower, 347 "LPAD": lambda args: build_pad(args), 348 "LEFTPAD": lambda args: build_pad(args), 349 "LTRIM": lambda args: build_trim(args), 350 "MOD": build_mod, 351 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 352 "RPAD": lambda args: build_pad(args, is_left=False), 353 "RTRIM": lambda args: build_trim(args, is_left=False), 354 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 355 if len(args) != 2 356 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 357 "STRPOS": exp.StrPosition.from_arg_list, 358 "CHARINDEX": lambda args: build_locate_strposition(args), 359 "INSTR": exp.StrPosition.from_arg_list, 360 "LOCATE": lambda args: build_locate_strposition(args), 361 "TIME_TO_TIME_STR": lambda args: exp.Cast( 362 this=seq_get(args, 0), 363 to=exp.DataType(this=exp.DataType.Type.TEXT), 364 ), 365 "TO_HEX": build_hex, 366 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 367 this=exp.Cast( 368 this=seq_get(args, 0), 369 to=exp.DataType(this=exp.DataType.Type.TEXT), 370 ), 371 start=exp.Literal.number(1), 372 length=exp.Literal.number(10), 373 ), 374 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 375 "UPPER": build_upper, 376 "UUID": lambda args, dialect: exp.Uuid(is_string=dialect.UUID_IS_STRING_TYPE or None), 377 "VAR_MAP": build_var_map, 378 } 379 380 NO_PAREN_FUNCTIONS = { 381 TokenType.CURRENT_DATE: exp.CurrentDate, 382 TokenType.CURRENT_DATETIME: exp.CurrentDate, 383 TokenType.CURRENT_TIME: exp.CurrentTime, 384 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 385 TokenType.CURRENT_USER: exp.CurrentUser, 386 TokenType.LOCALTIME: exp.Localtime, 387 TokenType.LOCALTIMESTAMP: exp.Localtimestamp, 388 TokenType.CURRENT_ROLE: exp.CurrentRole, 389 } 390 391 STRUCT_TYPE_TOKENS = { 392 TokenType.FILE, 393 TokenType.NESTED, 394 TokenType.OBJECT, 395 TokenType.STRUCT, 396 TokenType.UNION, 397 } 398 399 NESTED_TYPE_TOKENS = { 400 TokenType.ARRAY, 401 TokenType.LIST, 402 TokenType.LOWCARDINALITY, 403 TokenType.MAP, 404 TokenType.NULLABLE, 405 TokenType.RANGE, 406 *STRUCT_TYPE_TOKENS, 407 } 408 409 ENUM_TYPE_TOKENS = { 410 TokenType.DYNAMIC, 411 TokenType.ENUM, 412 TokenType.ENUM8, 413 TokenType.ENUM16, 414 } 415 416 AGGREGATE_TYPE_TOKENS = { 417 TokenType.AGGREGATEFUNCTION, 418 TokenType.SIMPLEAGGREGATEFUNCTION, 419 } 420 421 TYPE_TOKENS = { 422 TokenType.BIT, 423 TokenType.BOOLEAN, 424 TokenType.TINYINT, 425 TokenType.UTINYINT, 426 TokenType.SMALLINT, 427 TokenType.USMALLINT, 428 TokenType.INT, 429 TokenType.UINT, 430 TokenType.BIGINT, 431 TokenType.UBIGINT, 432 TokenType.BIGNUM, 433 TokenType.INT128, 434 TokenType.UINT128, 435 TokenType.INT256, 436 TokenType.UINT256, 437 TokenType.MEDIUMINT, 438 TokenType.UMEDIUMINT, 439 TokenType.FIXEDSTRING, 440 TokenType.FLOAT, 441 TokenType.DOUBLE, 442 TokenType.UDOUBLE, 443 TokenType.CHAR, 444 TokenType.NCHAR, 445 TokenType.VARCHAR, 446 TokenType.NVARCHAR, 447 TokenType.BPCHAR, 448 TokenType.TEXT, 449 TokenType.MEDIUMTEXT, 450 TokenType.LONGTEXT, 451 TokenType.BLOB, 452 TokenType.MEDIUMBLOB, 453 TokenType.LONGBLOB, 454 TokenType.BINARY, 455 TokenType.VARBINARY, 456 TokenType.JSON, 457 TokenType.JSONB, 458 TokenType.INTERVAL, 459 TokenType.TINYBLOB, 460 TokenType.TINYTEXT, 461 TokenType.TIME, 462 TokenType.TIMETZ, 463 TokenType.TIME_NS, 464 TokenType.TIMESTAMP, 465 TokenType.TIMESTAMP_S, 466 TokenType.TIMESTAMP_MS, 467 TokenType.TIMESTAMP_NS, 468 TokenType.TIMESTAMPTZ, 469 TokenType.TIMESTAMPLTZ, 470 TokenType.TIMESTAMPNTZ, 471 TokenType.DATETIME, 472 TokenType.DATETIME2, 473 TokenType.DATETIME64, 474 TokenType.SMALLDATETIME, 475 TokenType.DATE, 476 TokenType.DATE32, 477 TokenType.INT4RANGE, 478 TokenType.INT4MULTIRANGE, 479 TokenType.INT8RANGE, 480 TokenType.INT8MULTIRANGE, 481 TokenType.NUMRANGE, 482 TokenType.NUMMULTIRANGE, 483 TokenType.TSRANGE, 484 TokenType.TSMULTIRANGE, 485 TokenType.TSTZRANGE, 486 TokenType.TSTZMULTIRANGE, 487 TokenType.DATERANGE, 488 TokenType.DATEMULTIRANGE, 489 TokenType.DECIMAL, 490 TokenType.DECIMAL32, 491 TokenType.DECIMAL64, 492 TokenType.DECIMAL128, 493 TokenType.DECIMAL256, 494 TokenType.DECFLOAT, 495 TokenType.UDECIMAL, 496 TokenType.BIGDECIMAL, 497 TokenType.UUID, 498 TokenType.GEOGRAPHY, 499 TokenType.GEOGRAPHYPOINT, 500 TokenType.GEOMETRY, 501 TokenType.POINT, 502 TokenType.RING, 503 TokenType.LINESTRING, 504 TokenType.MULTILINESTRING, 505 TokenType.POLYGON, 506 TokenType.MULTIPOLYGON, 507 TokenType.HLLSKETCH, 508 TokenType.HSTORE, 509 TokenType.PSEUDO_TYPE, 510 TokenType.SUPER, 511 TokenType.SERIAL, 512 TokenType.SMALLSERIAL, 513 TokenType.BIGSERIAL, 514 TokenType.XML, 515 TokenType.YEAR, 516 TokenType.USERDEFINED, 517 TokenType.MONEY, 518 TokenType.SMALLMONEY, 519 TokenType.ROWVERSION, 520 TokenType.IMAGE, 521 TokenType.VARIANT, 522 TokenType.VECTOR, 523 TokenType.VOID, 524 TokenType.OBJECT, 525 TokenType.OBJECT_IDENTIFIER, 526 TokenType.INET, 527 TokenType.IPADDRESS, 528 TokenType.IPPREFIX, 529 TokenType.IPV4, 530 TokenType.IPV6, 531 TokenType.UNKNOWN, 532 TokenType.NOTHING, 533 TokenType.NULL, 534 TokenType.NAME, 535 TokenType.TDIGEST, 536 TokenType.DYNAMIC, 537 *ENUM_TYPE_TOKENS, 538 *NESTED_TYPE_TOKENS, 539 *AGGREGATE_TYPE_TOKENS, 540 } 541 542 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 543 TokenType.BIGINT: TokenType.UBIGINT, 544 TokenType.INT: TokenType.UINT, 545 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 546 TokenType.SMALLINT: TokenType.USMALLINT, 547 TokenType.TINYINT: TokenType.UTINYINT, 548 TokenType.DECIMAL: TokenType.UDECIMAL, 549 TokenType.DOUBLE: TokenType.UDOUBLE, 550 } 551 552 SUBQUERY_PREDICATES = { 553 TokenType.ANY: exp.Any, 554 TokenType.ALL: exp.All, 555 TokenType.EXISTS: exp.Exists, 556 TokenType.SOME: exp.Any, 557 } 558 559 RESERVED_TOKENS = { 560 *Tokenizer.SINGLE_TOKENS.values(), 561 TokenType.SELECT, 562 } - {TokenType.IDENTIFIER} 563 564 DB_CREATABLES = { 565 TokenType.DATABASE, 566 TokenType.DICTIONARY, 567 TokenType.FILE_FORMAT, 568 TokenType.MODEL, 569 TokenType.NAMESPACE, 570 TokenType.SCHEMA, 571 TokenType.SEMANTIC_VIEW, 572 TokenType.SEQUENCE, 573 TokenType.SINK, 574 TokenType.SOURCE, 575 TokenType.STAGE, 576 TokenType.STORAGE_INTEGRATION, 577 TokenType.STREAMLIT, 578 TokenType.TABLE, 579 TokenType.TAG, 580 TokenType.VIEW, 581 TokenType.WAREHOUSE, 582 } 583 584 CREATABLES = { 585 TokenType.COLUMN, 586 TokenType.CONSTRAINT, 587 TokenType.FOREIGN_KEY, 588 TokenType.FUNCTION, 589 TokenType.INDEX, 590 TokenType.PROCEDURE, 591 TokenType.TRIGGER, 592 *DB_CREATABLES, 593 } 594 595 TRIGGER_EVENTS = {TokenType.INSERT, TokenType.UPDATE, TokenType.DELETE, TokenType.TRUNCATE} 596 597 ALTERABLES = { 598 TokenType.INDEX, 599 TokenType.TABLE, 600 TokenType.VIEW, 601 TokenType.SESSION, 602 } 603 604 # Tokens that can represent identifiers 605 ID_VAR_TOKENS = { 606 TokenType.ALL, 607 TokenType.ANALYZE, 608 TokenType.ATTACH, 609 TokenType.VAR, 610 TokenType.ANTI, 611 TokenType.APPLY, 612 TokenType.ASC, 613 TokenType.ASOF, 614 TokenType.AUTO_INCREMENT, 615 TokenType.BEGIN, 616 TokenType.BPCHAR, 617 TokenType.CACHE, 618 TokenType.CASE, 619 TokenType.COLLATE, 620 TokenType.COMMAND, 621 TokenType.COMMENT, 622 TokenType.COMMIT, 623 TokenType.CONSTRAINT, 624 TokenType.COPY, 625 TokenType.CUBE, 626 TokenType.CURRENT_SCHEMA, 627 TokenType.DEFAULT, 628 TokenType.DELETE, 629 TokenType.DESC, 630 TokenType.DESCRIBE, 631 TokenType.DETACH, 632 TokenType.DICTIONARY, 633 TokenType.DIV, 634 TokenType.END, 635 TokenType.EXECUTE, 636 TokenType.EXPORT, 637 TokenType.ESCAPE, 638 TokenType.FALSE, 639 TokenType.FIRST, 640 TokenType.FILTER, 641 TokenType.FINAL, 642 TokenType.FORMAT, 643 TokenType.FULL, 644 TokenType.GET, 645 TokenType.IDENTIFIER, 646 TokenType.INOUT, 647 TokenType.IS, 648 TokenType.ISNULL, 649 TokenType.INTERVAL, 650 TokenType.KEEP, 651 TokenType.KILL, 652 TokenType.LEFT, 653 TokenType.LIMIT, 654 TokenType.LOAD, 655 TokenType.LOCK, 656 TokenType.MATCH, 657 TokenType.MERGE, 658 TokenType.NATURAL, 659 TokenType.NEXT, 660 TokenType.OFFSET, 661 TokenType.OPERATOR, 662 TokenType.ORDINALITY, 663 TokenType.OVER, 664 TokenType.OVERLAPS, 665 TokenType.OVERWRITE, 666 TokenType.PARTITION, 667 TokenType.PERCENT, 668 TokenType.PIVOT, 669 TokenType.PRAGMA, 670 TokenType.PUT, 671 TokenType.RANGE, 672 TokenType.RECURSIVE, 673 TokenType.REFERENCES, 674 TokenType.REFRESH, 675 TokenType.RENAME, 676 TokenType.REPLACE, 677 TokenType.RIGHT, 678 TokenType.ROLLUP, 679 TokenType.ROW, 680 TokenType.ROWS, 681 TokenType.SEMI, 682 TokenType.SET, 683 TokenType.SETTINGS, 684 TokenType.SHOW, 685 TokenType.TEMPORARY, 686 TokenType.TOP, 687 TokenType.TRUE, 688 TokenType.TRUNCATE, 689 TokenType.UNIQUE, 690 TokenType.UNNEST, 691 TokenType.UNPIVOT, 692 TokenType.UPDATE, 693 TokenType.USE, 694 TokenType.VOLATILE, 695 TokenType.WINDOW, 696 *ALTERABLES, 697 *CREATABLES, 698 *SUBQUERY_PREDICATES, 699 *TYPE_TOKENS, 700 *NO_PAREN_FUNCTIONS, 701 } 702 ID_VAR_TOKENS.remove(TokenType.UNION) 703 704 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 705 TokenType.ANTI, 706 TokenType.ASOF, 707 TokenType.FULL, 708 TokenType.LEFT, 709 TokenType.LOCK, 710 TokenType.NATURAL, 711 TokenType.RIGHT, 712 TokenType.SEMI, 713 TokenType.WINDOW, 714 } 715 716 ALIAS_TOKENS = ID_VAR_TOKENS 717 718 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 719 720 ARRAY_CONSTRUCTORS = { 721 "ARRAY": exp.Array, 722 "LIST": exp.List, 723 } 724 725 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 726 727 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 728 729 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 730 731 FUNC_TOKENS = { 732 TokenType.COLLATE, 733 TokenType.COMMAND, 734 TokenType.CURRENT_DATE, 735 TokenType.CURRENT_DATETIME, 736 TokenType.CURRENT_SCHEMA, 737 TokenType.CURRENT_TIMESTAMP, 738 TokenType.CURRENT_TIME, 739 TokenType.CURRENT_USER, 740 TokenType.CURRENT_CATALOG, 741 TokenType.FILTER, 742 TokenType.FIRST, 743 TokenType.FORMAT, 744 TokenType.GET, 745 TokenType.GLOB, 746 TokenType.IDENTIFIER, 747 TokenType.INDEX, 748 TokenType.ISNULL, 749 TokenType.ILIKE, 750 TokenType.INSERT, 751 TokenType.LIKE, 752 TokenType.LOCALTIME, 753 TokenType.LOCALTIMESTAMP, 754 TokenType.MERGE, 755 TokenType.NEXT, 756 TokenType.OFFSET, 757 TokenType.PRIMARY_KEY, 758 TokenType.RANGE, 759 TokenType.REPLACE, 760 TokenType.RLIKE, 761 TokenType.ROW, 762 TokenType.SESSION_USER, 763 TokenType.UNNEST, 764 TokenType.VAR, 765 TokenType.LEFT, 766 TokenType.RIGHT, 767 TokenType.SEQUENCE, 768 TokenType.DATE, 769 TokenType.DATETIME, 770 TokenType.TABLE, 771 TokenType.TIMESTAMP, 772 TokenType.TIMESTAMPTZ, 773 TokenType.TRUNCATE, 774 TokenType.UTC_DATE, 775 TokenType.UTC_TIME, 776 TokenType.UTC_TIMESTAMP, 777 TokenType.WINDOW, 778 TokenType.XOR, 779 *TYPE_TOKENS, 780 *SUBQUERY_PREDICATES, 781 } 782 783 CONJUNCTION: t.ClassVar[t.Dict[TokenType, t.Type[exp.Expression]]] = { 784 TokenType.AND: exp.And, 785 } 786 787 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 788 TokenType.COLON_EQ: exp.PropertyEQ, 789 } 790 791 DISJUNCTION: t.ClassVar[t.Dict[TokenType, t.Type[exp.Expression]]] = { 792 TokenType.OR: exp.Or, 793 } 794 795 EQUALITY = { 796 TokenType.EQ: exp.EQ, 797 TokenType.NEQ: exp.NEQ, 798 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 799 } 800 801 COMPARISON = { 802 TokenType.GT: exp.GT, 803 TokenType.GTE: exp.GTE, 804 TokenType.LT: exp.LT, 805 TokenType.LTE: exp.LTE, 806 } 807 808 BITWISE = { 809 TokenType.AMP: exp.BitwiseAnd, 810 TokenType.CARET: exp.BitwiseXor, 811 TokenType.PIPE: exp.BitwiseOr, 812 } 813 814 TERM = { 815 TokenType.DASH: exp.Sub, 816 TokenType.PLUS: exp.Add, 817 TokenType.MOD: exp.Mod, 818 TokenType.COLLATE: exp.Collate, 819 } 820 821 FACTOR = { 822 TokenType.DIV: exp.IntDiv, 823 TokenType.LR_ARROW: exp.Distance, 824 TokenType.SLASH: exp.Div, 825 TokenType.STAR: exp.Mul, 826 } 827 828 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 829 830 TIMES = { 831 TokenType.TIME, 832 TokenType.TIMETZ, 833 } 834 835 TIMESTAMPS = { 836 TokenType.TIMESTAMP, 837 TokenType.TIMESTAMPNTZ, 838 TokenType.TIMESTAMPTZ, 839 TokenType.TIMESTAMPLTZ, 840 *TIMES, 841 } 842 843 SET_OPERATIONS = { 844 TokenType.UNION, 845 TokenType.INTERSECT, 846 TokenType.EXCEPT, 847 } 848 849 JOIN_METHODS = { 850 TokenType.ASOF, 851 TokenType.NATURAL, 852 TokenType.POSITIONAL, 853 } 854 855 JOIN_SIDES = { 856 TokenType.LEFT, 857 TokenType.RIGHT, 858 TokenType.FULL, 859 } 860 861 JOIN_KINDS = { 862 TokenType.ANTI, 863 TokenType.CROSS, 864 TokenType.INNER, 865 TokenType.OUTER, 866 TokenType.SEMI, 867 TokenType.STRAIGHT_JOIN, 868 } 869 870 JOIN_HINTS: t.Set[str] = set() 871 872 LAMBDAS = { 873 TokenType.ARROW: lambda self, expressions: self.expression( 874 exp.Lambda, 875 this=self._replace_lambda( 876 self._parse_disjunction(), 877 expressions, 878 ), 879 expressions=expressions, 880 ), 881 TokenType.FARROW: lambda self, expressions: self.expression( 882 exp.Kwarg, 883 this=exp.var(expressions[0].name), 884 expression=self._parse_disjunction(), 885 ), 886 } 887 888 COLUMN_OPERATORS = { 889 TokenType.DOT: None, 890 TokenType.DOTCOLON: lambda self, this, to: self.expression( 891 exp.JSONCast, 892 this=this, 893 to=to, 894 ), 895 TokenType.DCOLON: lambda self, this, to: self.build_cast( 896 strict=self.STRICT_CAST, this=this, to=to 897 ), 898 TokenType.ARROW: lambda self, this, path: self.expression( 899 exp.JSONExtract, 900 this=this, 901 expression=self.dialect.to_json_path(path), 902 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 903 ), 904 TokenType.DARROW: lambda self, this, path: self.expression( 905 exp.JSONExtractScalar, 906 this=this, 907 expression=self.dialect.to_json_path(path), 908 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 909 scalar_only=self.dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY, 910 ), 911 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 912 exp.JSONBExtract, 913 this=this, 914 expression=path, 915 ), 916 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 917 exp.JSONBExtractScalar, 918 this=this, 919 expression=path, 920 ), 921 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 922 exp.JSONBContains, 923 this=this, 924 expression=key, 925 ), 926 } 927 928 CAST_COLUMN_OPERATORS = { 929 TokenType.DOTCOLON, 930 TokenType.DCOLON, 931 } 932 933 EXPRESSION_PARSERS = { 934 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 935 exp.Column: lambda self: self._parse_column(), 936 exp.ColumnDef: lambda self: self._parse_column_def(self._parse_column()), 937 exp.Condition: lambda self: self._parse_disjunction(), 938 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 939 exp.Expression: lambda self: self._parse_expression(), 940 exp.From: lambda self: self._parse_from(joins=True), 941 exp.GrantPrincipal: lambda self: self._parse_grant_principal(), 942 exp.GrantPrivilege: lambda self: self._parse_grant_privilege(), 943 exp.Group: lambda self: self._parse_group(), 944 exp.Having: lambda self: self._parse_having(), 945 exp.Hint: lambda self: self._parse_hint_body(), 946 exp.Identifier: lambda self: self._parse_id_var(), 947 exp.Join: lambda self: self._parse_join(), 948 exp.Lambda: lambda self: self._parse_lambda(), 949 exp.Lateral: lambda self: self._parse_lateral(), 950 exp.Limit: lambda self: self._parse_limit(), 951 exp.Offset: lambda self: self._parse_offset(), 952 exp.Order: lambda self: self._parse_order(), 953 exp.Ordered: lambda self: self._parse_ordered(), 954 exp.Properties: lambda self: self._parse_properties(), 955 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 956 exp.Qualify: lambda self: self._parse_qualify(), 957 exp.Returning: lambda self: self._parse_returning(), 958 exp.Select: lambda self: self._parse_select(), 959 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 960 exp.Table: lambda self: self._parse_table_parts(), 961 exp.TableAlias: lambda self: self._parse_table_alias(), 962 exp.Tuple: lambda self: self._parse_value(values=False), 963 exp.Whens: lambda self: self._parse_when_matched(), 964 exp.Where: lambda self: self._parse_where(), 965 exp.Window: lambda self: self._parse_named_window(), 966 exp.With: lambda self: self._parse_with(), 967 "JOIN_TYPE": lambda self: self._parse_join_parts(), 968 } 969 970 STATEMENT_PARSERS = { 971 TokenType.ALTER: lambda self: self._parse_alter(), 972 TokenType.ANALYZE: lambda self: self._parse_analyze(), 973 TokenType.BEGIN: lambda self: self._parse_transaction(), 974 TokenType.CACHE: lambda self: self._parse_cache(), 975 TokenType.COMMENT: lambda self: self._parse_comment(), 976 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 977 TokenType.COPY: lambda self: self._parse_copy(), 978 TokenType.CREATE: lambda self: self._parse_create(), 979 TokenType.DELETE: lambda self: self._parse_delete(), 980 TokenType.DESC: lambda self: self._parse_describe(), 981 TokenType.DESCRIBE: lambda self: self._parse_describe(), 982 TokenType.DROP: lambda self: self._parse_drop(), 983 TokenType.GRANT: lambda self: self._parse_grant(), 984 TokenType.REVOKE: lambda self: self._parse_revoke(), 985 TokenType.INSERT: lambda self: self._parse_insert(), 986 TokenType.KILL: lambda self: self._parse_kill(), 987 TokenType.LOAD: lambda self: self._parse_load(), 988 TokenType.MERGE: lambda self: self._parse_merge(), 989 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 990 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 991 TokenType.REFRESH: lambda self: self._parse_refresh(), 992 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 993 TokenType.SET: lambda self: self._parse_set(), 994 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 995 TokenType.UNCACHE: lambda self: self._parse_uncache(), 996 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 997 TokenType.UPDATE: lambda self: self._parse_update(), 998 TokenType.USE: lambda self: self._parse_use(), 999 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 1000 } 1001 1002 UNARY_PARSERS = { 1003 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 1004 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 1005 TokenType.TILDE: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 1006 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 1007 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 1008 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 1009 } 1010 1011 STRING_PARSERS = { 1012 TokenType.HEREDOC_STRING: lambda self, token: self.expression(exp.RawString, token=token), 1013 TokenType.NATIONAL_STRING: lambda self, token: self.expression(exp.National, token=token), 1014 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, token=token), 1015 TokenType.STRING: lambda self, token: self.expression( 1016 exp.Literal, token=token, is_string=True 1017 ), 1018 TokenType.UNICODE_STRING: lambda self, token: self.expression( 1019 exp.UnicodeString, 1020 token=token, 1021 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 1022 ), 1023 } 1024 1025 NUMERIC_PARSERS = { 1026 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, token=token), 1027 TokenType.BYTE_STRING: lambda self, token: self.expression( 1028 exp.ByteString, 1029 token=token, 1030 is_bytes=self.dialect.BYTE_STRING_IS_BYTES_TYPE or None, 1031 ), 1032 TokenType.HEX_STRING: lambda self, token: self.expression( 1033 exp.HexString, 1034 token=token, 1035 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 1036 ), 1037 TokenType.NUMBER: lambda self, token: self.expression( 1038 exp.Literal, token=token, is_string=False 1039 ), 1040 } 1041 1042 PRIMARY_PARSERS = { 1043 **STRING_PARSERS, 1044 **NUMERIC_PARSERS, 1045 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 1046 TokenType.NULL: lambda self, _: self.expression(exp.Null), 1047 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 1048 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 1049 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 1050 TokenType.STAR: lambda self, _: self._parse_star_ops(), 1051 } 1052 1053 PLACEHOLDER_PARSERS = { 1054 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 1055 TokenType.PARAMETER: lambda self: self._parse_parameter(), 1056 TokenType.COLON: lambda self: ( 1057 self.expression(exp.Placeholder, this=self._prev.text) 1058 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 1059 else None 1060 ), 1061 } 1062 1063 RANGE_PARSERS = { 1064 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 1065 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 1066 TokenType.GLOB: binary_range_parser(exp.Glob), 1067 TokenType.ILIKE: binary_range_parser(exp.ILike), 1068 TokenType.IN: lambda self, this: self._parse_in(this), 1069 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 1070 TokenType.IS: lambda self, this: self._parse_is(this), 1071 TokenType.LIKE: binary_range_parser(exp.Like), 1072 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 1073 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 1074 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 1075 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 1076 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 1077 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 1078 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 1079 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 1080 TokenType.ADJACENT: binary_range_parser(exp.Adjacent), 1081 TokenType.OPERATOR: lambda self, this: self._parse_operator(this), 1082 TokenType.AMP_LT: binary_range_parser(exp.ExtendsLeft), 1083 TokenType.AMP_GT: binary_range_parser(exp.ExtendsRight), 1084 } 1085 1086 PIPE_SYNTAX_TRANSFORM_PARSERS = { 1087 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 1088 "AS": lambda self, query: self._build_pipe_cte( 1089 query, [exp.Star()], self._parse_table_alias() 1090 ), 1091 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 1092 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 1093 "ORDER BY": lambda self, query: query.order_by( 1094 self._parse_order(), append=False, copy=False 1095 ), 1096 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1097 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 1098 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 1099 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1100 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 1101 } 1102 1103 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 1104 "ALLOWED_VALUES": lambda self: self.expression( 1105 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 1106 ), 1107 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 1108 "AUTO": lambda self: self._parse_auto_property(), 1109 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 1110 "BACKUP": lambda self: self.expression( 1111 exp.BackupProperty, this=self._parse_var(any_token=True) 1112 ), 1113 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 1114 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1115 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1116 "CHECKSUM": lambda self: self._parse_checksum(), 1117 "CLUSTER BY": lambda self: self._parse_cluster(), 1118 "CLUSTERED": lambda self: self._parse_clustered_by(), 1119 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 1120 exp.CollateProperty, **kwargs 1121 ), 1122 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 1123 "CONTAINS": lambda self: self._parse_contains_property(), 1124 "COPY": lambda self: self._parse_copy_property(), 1125 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 1126 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 1127 "DEFINER": lambda self: self._parse_definer(), 1128 "DETERMINISTIC": lambda self: self.expression( 1129 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1130 ), 1131 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 1132 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 1133 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 1134 "DISTKEY": lambda self: self._parse_distkey(), 1135 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1136 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1137 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1138 "ENVIRONMENT": lambda self: self.expression( 1139 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1140 ), 1141 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1142 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1143 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1144 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1145 "FREESPACE": lambda self: self._parse_freespace(), 1146 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1147 "HEAP": lambda self: self.expression(exp.HeapProperty), 1148 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1149 "IMMUTABLE": lambda self: self.expression( 1150 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1151 ), 1152 "INHERITS": lambda self: self.expression( 1153 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1154 ), 1155 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1156 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1157 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1158 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1159 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1160 "LIKE": lambda self: self._parse_create_like(), 1161 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1162 "LOCK": lambda self: self._parse_locking(), 1163 "LOCKING": lambda self: self._parse_locking(), 1164 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1165 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1166 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1167 "MODIFIES": lambda self: self._parse_modifies_property(), 1168 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1169 "NO": lambda self: self._parse_no_property(), 1170 "ON": lambda self: self._parse_on_property(), 1171 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1172 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1173 "PARTITION": lambda self: self._parse_partitioned_of(), 1174 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1175 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1176 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1177 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1178 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1179 "READS": lambda self: self._parse_reads_property(), 1180 "REMOTE": lambda self: self._parse_remote_with_connection(), 1181 "RETURNS": lambda self: self._parse_returns(), 1182 "STRICT": lambda self: self.expression(exp.StrictProperty), 1183 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1184 "ROW": lambda self: self._parse_row(), 1185 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1186 "SAMPLE": lambda self: self.expression( 1187 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1188 ), 1189 "SECURE": lambda self: self.expression(exp.SecureProperty), 1190 "SECURITY": lambda self: self._parse_security(), 1191 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1192 "SETTINGS": lambda self: self._parse_settings_property(), 1193 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1194 "SORTKEY": lambda self: self._parse_sortkey(), 1195 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1196 "STABLE": lambda self: self.expression( 1197 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1198 ), 1199 "STORED": lambda self: self._parse_stored(), 1200 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1201 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1202 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1203 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1204 "TO": lambda self: self._parse_to_table(), 1205 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1206 "TRANSFORM": lambda self: self.expression( 1207 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1208 ), 1209 "TTL": lambda self: self._parse_ttl(), 1210 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1211 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1212 "VOLATILE": lambda self: self._parse_volatile_property(), 1213 "WITH": lambda self: self._parse_with_property(), 1214 } 1215 1216 CONSTRAINT_PARSERS = { 1217 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1218 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1219 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1220 "CHARACTER SET": lambda self: self.expression( 1221 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1222 ), 1223 "CHECK": lambda self: self._parse_check_constraint(), 1224 "COLLATE": lambda self: self.expression( 1225 exp.CollateColumnConstraint, 1226 this=self._parse_identifier() or self._parse_column(), 1227 ), 1228 "COMMENT": lambda self: self.expression( 1229 exp.CommentColumnConstraint, this=self._parse_string() 1230 ), 1231 "COMPRESS": lambda self: self._parse_compress(), 1232 "CLUSTERED": lambda self: self.expression( 1233 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1234 ), 1235 "NONCLUSTERED": lambda self: self.expression( 1236 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1237 ), 1238 "DEFAULT": lambda self: self.expression( 1239 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1240 ), 1241 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1242 "EPHEMERAL": lambda self: self.expression( 1243 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1244 ), 1245 "EXCLUDE": lambda self: self.expression( 1246 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1247 ), 1248 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1249 "FORMAT": lambda self: self.expression( 1250 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1251 ), 1252 "GENERATED": lambda self: self._parse_generated_as_identity(), 1253 "IDENTITY": lambda self: self._parse_auto_increment(), 1254 "INLINE": lambda self: self._parse_inline(), 1255 "LIKE": lambda self: self._parse_create_like(), 1256 "NOT": lambda self: self._parse_not_constraint(), 1257 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1258 "ON": lambda self: ( 1259 self._match(TokenType.UPDATE) 1260 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1261 ) 1262 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1263 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1264 "PERIOD": lambda self: self._parse_period_for_system_time(), 1265 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1266 "REFERENCES": lambda self: self._parse_references(match=False), 1267 "TITLE": lambda self: self.expression( 1268 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1269 ), 1270 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1271 "UNIQUE": lambda self: self._parse_unique(), 1272 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1273 "WITH": lambda self: self.expression( 1274 exp.Properties, expressions=self._parse_wrapped_properties() 1275 ), 1276 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1277 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1278 } 1279 1280 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1281 if not self._match(TokenType.L_PAREN, advance=False): 1282 # Partitioning by bucket or truncate follows the syntax: 1283 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1284 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1285 self._retreat(self._index - 1) 1286 return None 1287 1288 klass = ( 1289 exp.PartitionedByBucket 1290 if self._prev.text.upper() == "BUCKET" 1291 else exp.PartitionByTruncate 1292 ) 1293 1294 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1295 this, expression = seq_get(args, 0), seq_get(args, 1) 1296 1297 if isinstance(this, exp.Literal): 1298 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1299 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1300 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1301 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1302 # 1303 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1304 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1305 this, expression = expression, this 1306 1307 return self.expression(klass, this=this, expression=expression) 1308 1309 ALTER_PARSERS = { 1310 "ADD": lambda self: self._parse_alter_table_add(), 1311 "AS": lambda self: self._parse_select(), 1312 "ALTER": lambda self: self._parse_alter_table_alter(), 1313 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1314 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1315 "DROP": lambda self: self._parse_alter_table_drop(), 1316 "RENAME": lambda self: self._parse_alter_table_rename(), 1317 "SET": lambda self: self._parse_alter_table_set(), 1318 "SWAP": lambda self: self.expression( 1319 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1320 ), 1321 } 1322 1323 ALTER_ALTER_PARSERS = { 1324 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1325 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1326 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1327 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1328 } 1329 1330 SCHEMA_UNNAMED_CONSTRAINTS = { 1331 "CHECK", 1332 "EXCLUDE", 1333 "FOREIGN KEY", 1334 "LIKE", 1335 "PERIOD", 1336 "PRIMARY KEY", 1337 "UNIQUE", 1338 "BUCKET", 1339 "TRUNCATE", 1340 } 1341 1342 NO_PAREN_FUNCTION_PARSERS = { 1343 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1344 "CASE": lambda self: self._parse_case(), 1345 "CONNECT_BY_ROOT": lambda self: self.expression( 1346 exp.ConnectByRoot, this=self._parse_column() 1347 ), 1348 "IF": lambda self: self._parse_if(), 1349 } 1350 1351 INVALID_FUNC_NAME_TOKENS = { 1352 TokenType.IDENTIFIER, 1353 TokenType.STRING, 1354 } 1355 1356 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1357 1358 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1359 1360 FUNCTION_PARSERS = { 1361 **{ 1362 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1363 }, 1364 **{ 1365 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1366 }, 1367 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1368 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1369 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1370 "CHAR": lambda self: self._parse_char(), 1371 "CHR": lambda self: self._parse_char(), 1372 "DECODE": lambda self: self._parse_decode(), 1373 "EXTRACT": lambda self: self._parse_extract(), 1374 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1375 "GAP_FILL": lambda self: self._parse_gap_fill(), 1376 "INITCAP": lambda self: self._parse_initcap(), 1377 "JSON_OBJECT": lambda self: self._parse_json_object(), 1378 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1379 "JSON_TABLE": lambda self: self._parse_json_table(), 1380 "MATCH": lambda self: self._parse_match_against(), 1381 "NORMALIZE": lambda self: self._parse_normalize(), 1382 "OPENJSON": lambda self: self._parse_open_json(), 1383 "OVERLAY": lambda self: self._parse_overlay(), 1384 "POSITION": lambda self: self._parse_position(), 1385 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1386 "STRING_AGG": lambda self: self._parse_string_agg(), 1387 "SUBSTRING": lambda self: self._parse_substring(), 1388 "TRIM": lambda self: self._parse_trim(), 1389 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1390 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1391 "XMLELEMENT": lambda self: self._parse_xml_element(), 1392 "XMLTABLE": lambda self: self._parse_xml_table(), 1393 } 1394 1395 QUERY_MODIFIER_PARSERS = { 1396 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1397 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1398 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1399 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1400 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1401 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1402 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1403 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1404 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1405 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1406 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1407 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1408 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1409 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1410 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1411 TokenType.CLUSTER_BY: lambda self: ( 1412 "cluster", 1413 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1414 ), 1415 TokenType.DISTRIBUTE_BY: lambda self: ( 1416 "distribute", 1417 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1418 ), 1419 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1420 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1421 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1422 } 1423 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1424 1425 SET_PARSERS = { 1426 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1427 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1428 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1429 "TRANSACTION": lambda self: self._parse_set_transaction(), 1430 } 1431 1432 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1433 1434 TYPE_LITERAL_PARSERS = { 1435 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1436 } 1437 1438 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1439 1440 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1441 1442 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1443 1444 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1445 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1446 "ISOLATION": ( 1447 ("LEVEL", "REPEATABLE", "READ"), 1448 ("LEVEL", "READ", "COMMITTED"), 1449 ("LEVEL", "READ", "UNCOMITTED"), 1450 ("LEVEL", "SERIALIZABLE"), 1451 ), 1452 "READ": ("WRITE", "ONLY"), 1453 } 1454 1455 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1456 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1457 ) 1458 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1459 1460 TRIGGER_TIMING: OPTIONS_TYPE = { 1461 "INSTEAD": (("OF",),), 1462 "BEFORE": tuple(), 1463 "AFTER": tuple(), 1464 } 1465 1466 TRIGGER_DEFERRABLE: OPTIONS_TYPE = { 1467 "NOT": (("DEFERRABLE",),), 1468 "DEFERRABLE": tuple(), 1469 } 1470 1471 CREATE_SEQUENCE: OPTIONS_TYPE = { 1472 "SCALE": ("EXTEND", "NOEXTEND"), 1473 "SHARD": ("EXTEND", "NOEXTEND"), 1474 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1475 **dict.fromkeys( 1476 ( 1477 "SESSION", 1478 "GLOBAL", 1479 "KEEP", 1480 "NOKEEP", 1481 "ORDER", 1482 "NOORDER", 1483 "NOCACHE", 1484 "CYCLE", 1485 "NOCYCLE", 1486 "NOMINVALUE", 1487 "NOMAXVALUE", 1488 "NOSCALE", 1489 "NOSHARD", 1490 ), 1491 tuple(), 1492 ), 1493 } 1494 1495 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1496 1497 USABLES: OPTIONS_TYPE = dict.fromkeys( 1498 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1499 ) 1500 1501 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1502 1503 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1504 "TYPE": ("EVOLUTION",), 1505 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1506 } 1507 1508 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1509 1510 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1511 1512 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1513 "NOT": ("ENFORCED",), 1514 "MATCH": ( 1515 "FULL", 1516 "PARTIAL", 1517 "SIMPLE", 1518 ), 1519 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1520 "USING": ( 1521 "BTREE", 1522 "HASH", 1523 ), 1524 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1525 } 1526 1527 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1528 "NO": ("OTHERS",), 1529 "CURRENT": ("ROW",), 1530 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1531 } 1532 1533 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1534 1535 CLONE_KEYWORDS = {"CLONE", "COPY"} 1536 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1537 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1538 1539 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1540 1541 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1542 1543 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1544 1545 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1546 1547 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.RANGE, TokenType.ROWS} 1548 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1549 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1550 1551 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1552 1553 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1554 1555 ADD_CONSTRAINT_TOKENS = { 1556 TokenType.CONSTRAINT, 1557 TokenType.FOREIGN_KEY, 1558 TokenType.INDEX, 1559 TokenType.KEY, 1560 TokenType.PRIMARY_KEY, 1561 TokenType.UNIQUE, 1562 } 1563 1564 DISTINCT_TOKENS = {TokenType.DISTINCT} 1565 1566 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1567 1568 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1569 1570 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1571 1572 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1573 1574 ODBC_DATETIME_LITERALS: t.Dict[str, t.Type[exp.Expression]] = {} 1575 1576 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1577 1578 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1579 1580 # The style options for the DESCRIBE statement 1581 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1582 1583 SET_ASSIGNMENT_DELIMITERS = {"=", ":=", "TO"} 1584 1585 # The style options for the ANALYZE statement 1586 ANALYZE_STYLES = { 1587 "BUFFER_USAGE_LIMIT", 1588 "FULL", 1589 "LOCAL", 1590 "NO_WRITE_TO_BINLOG", 1591 "SAMPLE", 1592 "SKIP_LOCKED", 1593 "VERBOSE", 1594 } 1595 1596 ANALYZE_EXPRESSION_PARSERS = { 1597 "ALL": lambda self: self._parse_analyze_columns(), 1598 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1599 "DELETE": lambda self: self._parse_analyze_delete(), 1600 "DROP": lambda self: self._parse_analyze_histogram(), 1601 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1602 "LIST": lambda self: self._parse_analyze_list(), 1603 "PREDICATE": lambda self: self._parse_analyze_columns(), 1604 "UPDATE": lambda self: self._parse_analyze_histogram(), 1605 "VALIDATE": lambda self: self._parse_analyze_validate(), 1606 } 1607 1608 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1609 1610 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1611 1612 OPERATION_MODIFIERS: t.Set[str] = set() 1613 1614 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1615 1616 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows, exp.Values) 1617 1618 STRICT_CAST = True 1619 1620 PREFIXED_PIVOT_COLUMNS = False 1621 IDENTIFY_PIVOT_STRINGS = False 1622 1623 LOG_DEFAULTS_TO_LN = False 1624 1625 # Whether the table sample clause expects CSV syntax 1626 TABLESAMPLE_CSV = False 1627 1628 # The default method used for table sampling 1629 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1630 1631 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1632 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1633 1634 # Whether the TRIM function expects the characters to trim as its first argument 1635 TRIM_PATTERN_FIRST = False 1636 1637 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1638 STRING_ALIASES = False 1639 1640 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1641 MODIFIERS_ATTACHED_TO_SET_OP = True 1642 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1643 1644 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1645 NO_PAREN_IF_COMMANDS = True 1646 1647 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1648 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1649 1650 # Whether the `:` operator is used to extract a value from a VARIANT column 1651 COLON_IS_VARIANT_EXTRACT = False 1652 1653 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1654 # If this is True and '(' is not found, the keyword will be treated as an identifier 1655 VALUES_FOLLOWED_BY_PAREN = True 1656 1657 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1658 SUPPORTS_IMPLICIT_UNNEST = False 1659 1660 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1661 INTERVAL_SPANS = True 1662 1663 # Whether a PARTITION clause can follow a table reference 1664 SUPPORTS_PARTITION_SELECTION = False 1665 1666 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1667 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1668 1669 # Whether the 'AS' keyword is optional in the CTE definition syntax 1670 OPTIONAL_ALIAS_TOKEN_CTE = True 1671 1672 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1673 ALTER_RENAME_REQUIRES_COLUMN = True 1674 1675 # Whether Alter statements are allowed to contain Partition specifications 1676 ALTER_TABLE_PARTITIONS = False 1677 1678 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1679 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1680 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1681 # as BigQuery, where all joins have the same precedence. 1682 JOINS_HAVE_EQUAL_PRECEDENCE = False 1683 1684 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1685 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1686 1687 # Whether map literals support arbitrary expressions as keys. 1688 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1689 # When False, keys are typically restricted to identifiers. 1690 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1691 1692 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1693 # is true for Snowflake but not for BigQuery which can also process strings 1694 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1695 1696 # Dialects like Databricks support JOINS without join criteria 1697 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1698 ADD_JOIN_ON_TRUE = False 1699 1700 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1701 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1702 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = False 1703 1704 # Autofilled 1705 SHOW_TRIE: t.Dict = {} 1706 SET_TRIE: t.Dict = {} 1707 1708 def __init__( 1709 self, 1710 error_level: t.Optional[ErrorLevel] = None, 1711 error_message_context: int = 100, 1712 max_errors: int = 3, 1713 dialect: DialectType = None, 1714 ): 1715 from sqlglot.dialects import Dialect 1716 1717 self._core = ParserCore( 1718 error_level=error_level or ErrorLevel.IMMEDIATE, 1719 error_message_context=error_message_context, 1720 max_errors=max_errors, 1721 dialect=Dialect.get_or_raise(dialect), 1722 ) 1723 self._match = self._core._match 1724 self._match_set = self._core._match_set 1725 self._match_pair = self._core._match_pair 1726 self._match_texts = self._core._match_texts 1727 self._match_text_seq = self._core._match_text_seq 1728 self._advance = self._core._advance 1729 self._advance_chunk = self._core._advance_chunk 1730 self._retreat = self._core._retreat 1731 self._add_comments = self._core._add_comments 1732 self._is_connected = self._core._is_connected 1733 self._find_sql = self._core._find_sql 1734 self.raise_error = self._core.raise_error 1735 self.validate_expression = self._core.validate_expression 1736 self._try_parse = self._core._try_parse 1737 1738 def reset(self) -> None: 1739 self._core.reset() 1740 1741 @property 1742 def _curr(self) -> t.Any: 1743 return self._core._curr 1744 1745 @property 1746 def _next(self) -> t.Any: 1747 return self._core._next 1748 1749 @property 1750 def _prev(self) -> t.Any: 1751 return self._core._prev 1752 1753 @property 1754 def _prev_comments(self) -> t.Any: 1755 return self._core._prev_comments 1756 1757 @property 1758 def _tokens(self) -> t.List[t.Any]: 1759 return self._core._tokens 1760 1761 @property 1762 def _index(self) -> int: 1763 return self._core._index 1764 1765 @property 1766 def _chunk_index(self) -> int: 1767 return self._core._chunk_index 1768 1769 @property 1770 def errors(self) -> t.List[t.Any]: 1771 return self._core.errors 1772 1773 @property 1774 def error_level(self) -> t.Any: 1775 return self._core.error_level 1776 1777 @property 1778 def error_message_context(self) -> int: 1779 return self._core.error_message_context 1780 1781 @property 1782 def max_errors(self) -> int: 1783 return self._core.max_errors 1784 1785 @property 1786 def dialect(self) -> t.Any: 1787 return self._core.dialect 1788 1789 @property 1790 def sql(self) -> str: 1791 return self._core.sql 1792 1793 @sql.setter 1794 def sql(self, value: str) -> None: 1795 self._core.sql = value 1796 1797 @property 1798 def _chunks(self) -> t.List[t.List[t.Any]]: 1799 return self._core._chunks 1800 1801 @_chunks.setter 1802 def _chunks(self, value: t.List[t.List[t.Any]]) -> None: 1803 self._core._chunks = value 1804 1805 @property 1806 def _pipe_cte_counter(self) -> int: 1807 return self._core._pipe_cte_counter 1808 1809 @_pipe_cte_counter.setter 1810 def _pipe_cte_counter(self, value: int) -> None: 1811 self._core._pipe_cte_counter = value 1812 1813 def parse(self, raw_tokens: t.List[Token], sql: str) -> t.List[t.Optional[exp.Expression]]: 1814 """ 1815 Parses a list of tokens and returns a list of syntax trees, one tree 1816 per parsed SQL statement. 1817 1818 Args: 1819 raw_tokens: The list of tokens. 1820 sql: The original SQL string. 1821 1822 Returns: 1823 The list of the produced syntax trees. 1824 """ 1825 return self._parse( 1826 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1827 ) 1828 1829 def parse_into( 1830 self, 1831 expression_types: exp.IntoType, 1832 raw_tokens: t.List[Token], 1833 sql: t.Optional[str] = None, 1834 ) -> t.List[t.Optional[exp.Expression]]: 1835 """ 1836 Parses a list of tokens into a given Expression type. If a collection of Expression 1837 types is given instead, this method will try to parse the token list into each one 1838 of them, stopping at the first for which the parsing succeeds. 1839 1840 Args: 1841 expression_types: The expression type(s) to try and parse the token list into. 1842 raw_tokens: The list of tokens. 1843 sql: The original SQL string, used to produce helpful debug messages. 1844 1845 Returns: 1846 The target Expression. 1847 """ 1848 errors = [] 1849 for expression_type in ensure_list(expression_types): 1850 parser = self.EXPRESSION_PARSERS.get(expression_type) 1851 if not parser: 1852 raise TypeError(f"No parser registered for {expression_type}") 1853 1854 try: 1855 return self._parse(parser, raw_tokens, sql) 1856 except ParseError as e: 1857 e.errors[0]["into_expression"] = expression_type 1858 errors.append(e) 1859 1860 raise ParseError( 1861 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1862 errors=merge_errors(errors), 1863 ) from errors[-1] 1864 1865 def check_errors(self) -> None: 1866 """Logs or raises any found errors, depending on the chosen error level setting.""" 1867 if self.error_level == ErrorLevel.WARN: 1868 for error in self.errors: 1869 logger.error(str(error)) 1870 elif self.error_level == ErrorLevel.RAISE and self.errors: 1871 raise ParseError( 1872 concat_messages(self.errors, self.max_errors), 1873 errors=merge_errors(self.errors), 1874 ) 1875 1876 def expression( 1877 self, 1878 exp_class: t.Type[E], 1879 token: t.Optional[Token] = None, 1880 comments: t.Optional[t.List[str]] = None, 1881 **kwargs, 1882 ) -> E: 1883 if token: 1884 instance = exp_class(this=token.text, **kwargs) 1885 instance.update_positions(token) 1886 else: 1887 instance = exp_class(**kwargs) 1888 instance.add_comments(comments) if comments else self._add_comments(instance) 1889 return self.validate_expression(instance) 1890 1891 def _parse_batch_statements( 1892 self, 1893 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1894 sep_first_statement: bool = True, 1895 ) -> t.List[t.Optional[exp.Expression]]: 1896 expressions = [] 1897 1898 # Chunkification binds if/while statements with the first statement of the body 1899 if sep_first_statement: 1900 self._match(TokenType.BEGIN) 1901 expressions.append(parse_method(self)) 1902 1903 chunks_length = len(self._chunks) 1904 while self._chunk_index < chunks_length: 1905 self._advance_chunk() 1906 1907 if self._match(TokenType.ELSE, advance=False): 1908 return expressions 1909 1910 if not self._next and self._match(TokenType.END): 1911 expressions.append(exp.EndStatement()) 1912 continue 1913 1914 expressions.append(parse_method(self)) 1915 1916 if self._index < len(self._tokens): 1917 self.raise_error("Invalid expression / Unexpected token") 1918 1919 self.check_errors() 1920 1921 return expressions 1922 1923 def _parse( 1924 self, 1925 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1926 raw_tokens: t.List[Token], 1927 sql: t.Optional[str] = None, 1928 ) -> t.List[t.Optional[exp.Expression]]: 1929 self.reset() 1930 self.sql = sql or "" 1931 1932 total = len(raw_tokens) 1933 chunks: t.List[t.List[Token]] = [[]] 1934 1935 for i, token in enumerate(raw_tokens): 1936 if token.token_type == TokenType.SEMICOLON: 1937 if token.comments: 1938 chunks.append([token]) 1939 1940 if i < total - 1: 1941 chunks.append([]) 1942 else: 1943 chunks[-1].append(token) 1944 1945 self._chunks = chunks 1946 1947 return self._parse_batch_statements(parse_method=parse_method, sep_first_statement=False) 1948 1949 def _warn_unsupported(self) -> None: 1950 if len(self._tokens) <= 1: 1951 return 1952 1953 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1954 # interested in emitting a warning for the one being currently processed. 1955 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1956 1957 logger.warning( 1958 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1959 ) 1960 1961 def _parse_command(self) -> exp.Command: 1962 self._warn_unsupported() 1963 return self.expression( 1964 exp.Command, 1965 comments=self._prev_comments, 1966 this=self._prev.text.upper(), 1967 expression=self._parse_string(), 1968 ) 1969 1970 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1971 start = self._prev 1972 exists = self._parse_exists() if allow_exists else None 1973 1974 self._match(TokenType.ON) 1975 1976 materialized = self._match_text_seq("MATERIALIZED") 1977 kind = self._match_set(self.CREATABLES) and self._prev 1978 if not kind: 1979 return self._parse_as_command(start) 1980 1981 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1982 this = self._parse_user_defined_function(kind=kind.token_type) 1983 elif kind.token_type == TokenType.TABLE: 1984 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1985 elif kind.token_type == TokenType.COLUMN: 1986 this = self._parse_column() 1987 else: 1988 this = self._parse_id_var() 1989 1990 self._match(TokenType.IS) 1991 1992 return self.expression( 1993 exp.Comment, 1994 this=this, 1995 kind=kind.text, 1996 expression=self._parse_string(), 1997 exists=exists, 1998 materialized=materialized, 1999 ) 2000 2001 def _parse_to_table( 2002 self, 2003 ) -> exp.ToTableProperty: 2004 table = self._parse_table_parts(schema=True) 2005 return self.expression(exp.ToTableProperty, this=table) 2006 2007 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 2008 def _parse_ttl(self) -> exp.Expression: 2009 def _parse_ttl_action() -> t.Optional[exp.Expression]: 2010 this = self._parse_bitwise() 2011 2012 if self._match_text_seq("DELETE"): 2013 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 2014 if self._match_text_seq("RECOMPRESS"): 2015 return self.expression( 2016 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 2017 ) 2018 if self._match_text_seq("TO", "DISK"): 2019 return self.expression( 2020 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 2021 ) 2022 if self._match_text_seq("TO", "VOLUME"): 2023 return self.expression( 2024 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 2025 ) 2026 2027 return this 2028 2029 expressions = self._parse_csv(_parse_ttl_action) 2030 where = self._parse_where() 2031 group = self._parse_group() 2032 2033 aggregates = None 2034 if group and self._match(TokenType.SET): 2035 aggregates = self._parse_csv(self._parse_set_item) 2036 2037 return self.expression( 2038 exp.MergeTreeTTL, 2039 expressions=expressions, 2040 where=where, 2041 group=group, 2042 aggregates=aggregates, 2043 ) 2044 2045 def _parse_condition(self) -> t.Any: 2046 return self._parse_wrapped(parse_method=self._parse_expression, optional=True) 2047 2048 def _parse_block(self) -> exp.Block: 2049 return self.expression( 2050 exp.Block, 2051 expressions=self._parse_batch_statements( 2052 parse_method=lambda self: self._parse_statement() 2053 ), 2054 ) 2055 2056 def _parse_whileblock(self) -> exp.WhileBlock: 2057 return self.expression( 2058 exp.WhileBlock, 2059 this=self._parse_condition(), 2060 body=self._parse_block(), 2061 ) 2062 2063 def _parse_statement(self) -> t.Optional[exp.Expression]: 2064 if self._curr is None: 2065 return None 2066 2067 if self._match_set(self.STATEMENT_PARSERS): 2068 comments = self._prev_comments 2069 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 2070 stmt.add_comments(comments, prepend=True) 2071 return stmt 2072 2073 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 2074 return self._parse_command() 2075 2076 if self._match_text_seq("WHILE"): 2077 return self._parse_whileblock() 2078 2079 expression = self._parse_expression() 2080 expression = self._parse_set_operations(expression) if expression else self._parse_select() 2081 return self._parse_query_modifiers(expression) 2082 2083 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 2084 start = self._prev 2085 temporary = self._match(TokenType.TEMPORARY) 2086 materialized = self._match_text_seq("MATERIALIZED") 2087 2088 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 2089 if not kind: 2090 return self._parse_as_command(start) 2091 2092 concurrently = self._match_text_seq("CONCURRENTLY") 2093 if_exists = exists or self._parse_exists() 2094 2095 if kind == "COLUMN": 2096 this = self._parse_column() 2097 else: 2098 this = self._parse_table_parts( 2099 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 2100 ) 2101 2102 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 2103 2104 if self._match(TokenType.L_PAREN, advance=False): 2105 expressions = self._parse_wrapped_csv(self._parse_types) 2106 else: 2107 expressions = None 2108 2109 return self.expression( 2110 exp.Drop, 2111 exists=if_exists, 2112 this=this, 2113 expressions=expressions, 2114 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 2115 temporary=temporary, 2116 materialized=materialized, 2117 cascade=self._match_text_seq("CASCADE"), 2118 constraints=self._match_text_seq("CONSTRAINTS"), 2119 purge=self._match_text_seq("PURGE"), 2120 cluster=cluster, 2121 concurrently=concurrently, 2122 ) 2123 2124 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 2125 return ( 2126 self._match_text_seq("IF") 2127 and (not not_ or self._match(TokenType.NOT)) 2128 and self._match(TokenType.EXISTS) 2129 ) 2130 2131 def _parse_create(self) -> exp.Create | exp.Command: 2132 # Note: this can't be None because we've matched a statement parser 2133 start = self._prev 2134 2135 replace = ( 2136 start.token_type == TokenType.REPLACE 2137 or self._match_pair(TokenType.OR, TokenType.REPLACE) 2138 or self._match_pair(TokenType.OR, TokenType.ALTER) 2139 ) 2140 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 2141 2142 unique = self._match(TokenType.UNIQUE) 2143 2144 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2145 clustered = True 2146 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2147 "COLUMNSTORE" 2148 ): 2149 clustered = False 2150 else: 2151 clustered = None 2152 2153 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2154 self._advance() 2155 2156 properties = None 2157 create_token = self._match_set(self.CREATABLES) and self._prev 2158 2159 if not create_token: 2160 # exp.Properties.Location.POST_CREATE 2161 properties = self._parse_properties() 2162 create_token = self._match_set(self.CREATABLES) and self._prev 2163 2164 if not properties or not create_token: 2165 return self._parse_as_command(start) 2166 2167 create_token_type = t.cast(Token, create_token).token_type 2168 2169 concurrently = self._match_text_seq("CONCURRENTLY") 2170 exists = self._parse_exists(not_=True) 2171 this = None 2172 expression: t.Optional[exp.Expression] = None 2173 indexes = None 2174 no_schema_binding = None 2175 begin = None 2176 clone = None 2177 2178 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2179 nonlocal properties 2180 if properties and temp_props: 2181 properties.expressions.extend(temp_props.expressions) 2182 elif temp_props: 2183 properties = temp_props 2184 2185 if create_token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2186 this = self._parse_user_defined_function(kind=create_token_type) 2187 2188 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2189 extend_props(self._parse_properties()) 2190 2191 expression = self._parse_heredoc() if self._match(TokenType.ALIAS) else None 2192 extend_props(self._parse_properties()) 2193 2194 if not expression: 2195 if self._match(TokenType.COMMAND): 2196 expression = self._parse_as_command(self._prev) 2197 else: 2198 begin = self._match(TokenType.BEGIN) 2199 return_ = self._match_text_seq("RETURN") 2200 2201 if self._match(TokenType.STRING, advance=False): 2202 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2203 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2204 expression = self._parse_string() 2205 extend_props(self._parse_properties()) 2206 else: 2207 expression = ( 2208 self._parse_user_defined_function_expression() 2209 if create_token_type == TokenType.FUNCTION 2210 else self._parse_block() 2211 ) 2212 2213 if return_: 2214 expression = self.expression(exp.Return, this=expression) 2215 elif create_token_type == TokenType.INDEX: 2216 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2217 if not self._match(TokenType.ON): 2218 index = self._parse_id_var() 2219 anonymous = False 2220 else: 2221 index = None 2222 anonymous = True 2223 2224 this = self._parse_index(index=index, anonymous=anonymous) 2225 elif ( 2226 create_token_type == TokenType.CONSTRAINT and self._match(TokenType.TRIGGER) 2227 ) or create_token_type == TokenType.TRIGGER: 2228 if is_constraint := (create_token_type == TokenType.CONSTRAINT): 2229 create_token = self._prev 2230 2231 trigger_name = self._parse_id_var() 2232 if not trigger_name: 2233 return self._parse_as_command(start) 2234 2235 timing_var = self._parse_var_from_options(self.TRIGGER_TIMING, raise_unmatched=False) 2236 timing = timing_var.this if timing_var else None 2237 if not timing: 2238 return self._parse_as_command(start) 2239 2240 events = self._parse_trigger_events() 2241 if not self._match(TokenType.ON): 2242 self.raise_error("Expected ON in trigger definition") 2243 2244 table = self._parse_table_parts() 2245 referenced_table = self._parse_table_parts() if self._match(TokenType.FROM) else None 2246 deferrable, initially = self._parse_trigger_deferrable() 2247 referencing = self._parse_trigger_referencing() 2248 for_each = self._parse_trigger_for_each() 2249 when = self._match_text_seq("WHEN") and self._parse_wrapped( 2250 self._parse_disjunction, optional=True 2251 ) 2252 execute = self._parse_trigger_execute() 2253 2254 if execute is None: 2255 return self._parse_as_command(start) 2256 2257 trigger_props = self.expression( 2258 exp.TriggerProperties, 2259 table=table, 2260 timing=timing, 2261 events=events, 2262 execute=execute, 2263 constraint=is_constraint, 2264 referenced_table=referenced_table, 2265 deferrable=deferrable, 2266 initially=initially, 2267 referencing=referencing, 2268 for_each=for_each, 2269 when=when, 2270 ) 2271 2272 this = trigger_name 2273 extend_props(exp.Properties(expressions=[trigger_props] if trigger_props else [])) 2274 elif create_token_type in self.DB_CREATABLES: 2275 table_parts = self._parse_table_parts( 2276 schema=True, is_db_reference=create_token_type == TokenType.SCHEMA 2277 ) 2278 2279 # exp.Properties.Location.POST_NAME 2280 self._match(TokenType.COMMA) 2281 extend_props(self._parse_properties(before=True)) 2282 2283 this = self._parse_schema(this=table_parts) 2284 2285 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2286 extend_props(self._parse_properties()) 2287 2288 has_alias = self._match(TokenType.ALIAS) 2289 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2290 # exp.Properties.Location.POST_ALIAS 2291 extend_props(self._parse_properties()) 2292 2293 if create_token_type == TokenType.SEQUENCE: 2294 expression = self._parse_types() 2295 props = self._parse_properties() 2296 if props: 2297 sequence_props = exp.SequenceProperties() 2298 options = [] 2299 for prop in props: 2300 if isinstance(prop, exp.SequenceProperties): 2301 for arg, value in prop.args.items(): 2302 if arg == "options": 2303 options.extend(value) 2304 else: 2305 sequence_props.set(arg, value) 2306 prop.pop() 2307 2308 if options: 2309 sequence_props.set("options", options) 2310 2311 props.append("expressions", sequence_props) 2312 extend_props(props) 2313 else: 2314 expression = self._parse_ddl_select() 2315 2316 # Some dialects also support using a table as an alias instead of a SELECT. 2317 # Here we fallback to this as an alternative. 2318 if not expression and has_alias: 2319 expression = self._try_parse(self._parse_table_parts) 2320 2321 if create_token_type == TokenType.TABLE: 2322 # exp.Properties.Location.POST_EXPRESSION 2323 extend_props(self._parse_properties()) 2324 2325 indexes = [] 2326 while True: 2327 index = self._parse_index() 2328 2329 # exp.Properties.Location.POST_INDEX 2330 extend_props(self._parse_properties()) 2331 if not index: 2332 break 2333 else: 2334 self._match(TokenType.COMMA) 2335 indexes.append(index) 2336 elif create_token_type == TokenType.VIEW: 2337 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2338 no_schema_binding = True 2339 elif create_token_type in (TokenType.SINK, TokenType.SOURCE): 2340 extend_props(self._parse_properties()) 2341 2342 shallow = self._match_text_seq("SHALLOW") 2343 2344 if self._match_texts(self.CLONE_KEYWORDS): 2345 copy = self._prev.text.lower() == "copy" 2346 clone = self.expression( 2347 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2348 ) 2349 2350 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2351 return self._parse_as_command(start) 2352 2353 create_kind_text = create_token.text.upper() 2354 return self.expression( 2355 exp.Create, 2356 this=this, 2357 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2358 replace=replace, 2359 refresh=refresh, 2360 unique=unique, 2361 expression=expression, 2362 exists=exists, 2363 properties=properties, 2364 indexes=indexes, 2365 no_schema_binding=no_schema_binding, 2366 begin=begin, 2367 clone=clone, 2368 concurrently=concurrently, 2369 clustered=clustered, 2370 ) 2371 2372 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2373 seq = exp.SequenceProperties() 2374 2375 options = [] 2376 index = self._index 2377 2378 while self._curr: 2379 self._match(TokenType.COMMA) 2380 if self._match_text_seq("INCREMENT"): 2381 self._match_text_seq("BY") 2382 self._match_text_seq("=") 2383 seq.set("increment", self._parse_term()) 2384 elif self._match_text_seq("MINVALUE"): 2385 seq.set("minvalue", self._parse_term()) 2386 elif self._match_text_seq("MAXVALUE"): 2387 seq.set("maxvalue", self._parse_term()) 2388 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2389 self._match_text_seq("=") 2390 seq.set("start", self._parse_term()) 2391 elif self._match_text_seq("CACHE"): 2392 # T-SQL allows empty CACHE which is initialized dynamically 2393 seq.set("cache", self._parse_number() or True) 2394 elif self._match_text_seq("OWNED", "BY"): 2395 # "OWNED BY NONE" is the default 2396 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2397 else: 2398 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2399 if opt: 2400 options.append(opt) 2401 else: 2402 break 2403 2404 seq.set("options", options if options else None) 2405 return None if self._index == index else seq 2406 2407 def _parse_trigger_events(self) -> t.List[exp.TriggerEvent]: 2408 events = [] 2409 2410 while True: 2411 event_type = self._match_set(self.TRIGGER_EVENTS) and self._prev.text.upper() 2412 2413 if not event_type: 2414 self.raise_error("Expected trigger event (INSERT, UPDATE, DELETE, TRUNCATE)") 2415 2416 columns = ( 2417 self._parse_csv(self._parse_column) 2418 if event_type == "UPDATE" and self._match_text_seq("OF") 2419 else None 2420 ) 2421 2422 events.append(self.expression(exp.TriggerEvent, this=event_type, columns=columns)) 2423 2424 if not self._match(TokenType.OR): 2425 break 2426 2427 return events 2428 2429 def _parse_trigger_deferrable( 2430 self, 2431 ) -> t.Tuple[t.Optional[str], t.Optional[str]]: 2432 deferrable_var = self._parse_var_from_options( 2433 self.TRIGGER_DEFERRABLE, raise_unmatched=False 2434 ) 2435 deferrable = deferrable_var.this if deferrable_var else None 2436 2437 initially = None 2438 if deferrable and self._match_text_seq("INITIALLY"): 2439 initially = ( 2440 self._prev.text.upper() if self._match_texts(("IMMEDIATE", "DEFERRED")) else None 2441 ) 2442 2443 return deferrable, initially 2444 2445 def _parse_trigger_referencing_clause(self, keyword: str) -> t.Optional[exp.Expression]: 2446 if not self._match_text_seq(keyword): 2447 return None 2448 if not self._match_text_seq("TABLE"): 2449 self.raise_error(f"Expected TABLE after {keyword} in REFERENCING clause") 2450 self._match_text_seq("AS") 2451 return self._parse_id_var() 2452 2453 def _parse_trigger_referencing(self) -> t.Optional[exp.TriggerReferencing]: 2454 if not self._match_text_seq("REFERENCING"): 2455 return None 2456 2457 old_alias = None 2458 new_alias = None 2459 2460 while True: 2461 if alias := self._parse_trigger_referencing_clause("OLD"): 2462 if old_alias is not None: 2463 self.raise_error("Duplicate OLD clause in REFERENCING") 2464 old_alias = alias 2465 elif alias := self._parse_trigger_referencing_clause("NEW"): 2466 if new_alias is not None: 2467 self.raise_error("Duplicate NEW clause in REFERENCING") 2468 new_alias = alias 2469 else: 2470 break 2471 2472 if old_alias is None and new_alias is None: 2473 self.raise_error("REFERENCING clause requires at least OLD TABLE or NEW TABLE") 2474 2475 return self.expression( 2476 exp.TriggerReferencing, 2477 old=old_alias, 2478 new=new_alias, 2479 ) 2480 2481 def _parse_trigger_for_each(self) -> t.Optional[str]: 2482 if not self._match_text_seq("FOR", "EACH"): 2483 return None 2484 2485 return self._prev.text.upper() if self._match_texts(("ROW", "STATEMENT")) else None 2486 2487 def _parse_trigger_execute(self) -> t.Optional[exp.TriggerExecute]: 2488 if not self._match(TokenType.EXECUTE): 2489 return None 2490 2491 if not self._match_set((TokenType.FUNCTION, TokenType.PROCEDURE)): 2492 self.raise_error("Expected FUNCTION or PROCEDURE after EXECUTE") 2493 2494 func_call = self._parse_function(anonymous=True, optional_parens=False) 2495 return self.expression(exp.TriggerExecute, this=func_call) 2496 2497 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2498 # only used for teradata currently 2499 self._match(TokenType.COMMA) 2500 2501 kwargs = { 2502 "no": self._match_text_seq("NO"), 2503 "dual": self._match_text_seq("DUAL"), 2504 "before": self._match_text_seq("BEFORE"), 2505 "default": self._match_text_seq("DEFAULT"), 2506 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2507 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2508 "after": self._match_text_seq("AFTER"), 2509 "minimum": self._match_texts(("MIN", "MINIMUM")), 2510 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2511 } 2512 2513 if self._match_texts(self.PROPERTY_PARSERS): 2514 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2515 try: 2516 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2517 except TypeError: 2518 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2519 2520 return None 2521 2522 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2523 return self._parse_wrapped_csv(self._parse_property) 2524 2525 def _parse_property(self) -> t.Optional[exp.Expression]: 2526 if self._match_texts(self.PROPERTY_PARSERS): 2527 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2528 2529 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2530 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2531 2532 if self._match_text_seq("COMPOUND", "SORTKEY"): 2533 return self._parse_sortkey(compound=True) 2534 2535 if self._match_text_seq("SQL", "SECURITY"): 2536 return self.expression( 2537 exp.SqlSecurityProperty, 2538 this=self._match_texts(("DEFINER", "INVOKER")) and self._prev.text.upper(), 2539 ) 2540 2541 index = self._index 2542 2543 seq_props = self._parse_sequence_properties() 2544 if seq_props: 2545 return seq_props 2546 2547 self._retreat(index) 2548 key = self._parse_column() 2549 2550 if not self._match(TokenType.EQ): 2551 self._retreat(index) 2552 return None 2553 2554 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2555 if isinstance(key, exp.Column): 2556 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2557 2558 value = self._parse_bitwise() or self._parse_var(any_token=True) 2559 2560 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2561 if isinstance(value, exp.Column): 2562 value = exp.var(value.name) 2563 2564 return self.expression(exp.Property, this=key, value=value) 2565 2566 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2567 if self._match_text_seq("BY"): 2568 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2569 2570 self._match(TokenType.ALIAS) 2571 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2572 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2573 2574 return self.expression( 2575 exp.FileFormatProperty, 2576 this=( 2577 self.expression( 2578 exp.InputOutputFormat, 2579 input_format=input_format, 2580 output_format=output_format, 2581 ) 2582 if input_format or output_format 2583 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2584 ), 2585 hive_format=True, 2586 ) 2587 2588 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2589 field = self._parse_field() 2590 if isinstance(field, exp.Identifier) and not field.quoted: 2591 field = exp.var(field) 2592 2593 return field 2594 2595 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2596 self._match(TokenType.EQ) 2597 self._match(TokenType.ALIAS) 2598 2599 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2600 2601 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2602 properties = [] 2603 while True: 2604 if before: 2605 prop = self._parse_property_before() 2606 else: 2607 prop = self._parse_property() 2608 if not prop: 2609 break 2610 for p in ensure_list(prop): 2611 properties.append(p) 2612 2613 if properties: 2614 return self.expression(exp.Properties, expressions=properties) 2615 2616 return None 2617 2618 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2619 return self.expression( 2620 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2621 ) 2622 2623 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2624 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2625 security_specifier = self._prev.text.upper() 2626 return self.expression(exp.SecurityProperty, this=security_specifier) 2627 return None 2628 2629 def _parse_settings_property(self) -> exp.SettingsProperty: 2630 return self.expression( 2631 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2632 ) 2633 2634 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2635 if self._index >= 2: 2636 pre_volatile_token = self._tokens[self._index - 2] 2637 else: 2638 pre_volatile_token = None 2639 2640 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2641 return exp.VolatileProperty() 2642 2643 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2644 2645 def _parse_retention_period(self) -> exp.Var: 2646 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2647 number = self._parse_number() 2648 number_str = f"{number} " if number else "" 2649 unit = self._parse_var(any_token=True) 2650 return exp.var(f"{number_str}{unit}") 2651 2652 def _parse_system_versioning_property( 2653 self, with_: bool = False 2654 ) -> exp.WithSystemVersioningProperty: 2655 self._match(TokenType.EQ) 2656 prop = self.expression( 2657 exp.WithSystemVersioningProperty, 2658 on=True, 2659 with_=with_, 2660 ) 2661 2662 if self._match_text_seq("OFF"): 2663 prop.set("on", False) 2664 return prop 2665 2666 self._match(TokenType.ON) 2667 if self._match(TokenType.L_PAREN): 2668 while self._curr and not self._match(TokenType.R_PAREN): 2669 if self._match_text_seq("HISTORY_TABLE", "="): 2670 prop.set("this", self._parse_table_parts()) 2671 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2672 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2673 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2674 prop.set("retention_period", self._parse_retention_period()) 2675 2676 self._match(TokenType.COMMA) 2677 2678 return prop 2679 2680 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2681 self._match(TokenType.EQ) 2682 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2683 prop = self.expression(exp.DataDeletionProperty, on=on) 2684 2685 if self._match(TokenType.L_PAREN): 2686 while self._curr and not self._match(TokenType.R_PAREN): 2687 if self._match_text_seq("FILTER_COLUMN", "="): 2688 prop.set("filter_column", self._parse_column()) 2689 elif self._match_text_seq("RETENTION_PERIOD", "="): 2690 prop.set("retention_period", self._parse_retention_period()) 2691 2692 self._match(TokenType.COMMA) 2693 2694 return prop 2695 2696 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2697 kind = "HASH" 2698 expressions: t.Optional[t.List[exp.Expression]] = None 2699 if self._match_text_seq("BY", "HASH"): 2700 expressions = self._parse_wrapped_csv(self._parse_id_var) 2701 elif self._match_text_seq("BY", "RANDOM"): 2702 kind = "RANDOM" 2703 2704 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2705 buckets: t.Optional[exp.Expression] = None 2706 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2707 buckets = self._parse_number() 2708 2709 return self.expression( 2710 exp.DistributedByProperty, 2711 expressions=expressions, 2712 kind=kind, 2713 buckets=buckets, 2714 order=self._parse_order(), 2715 ) 2716 2717 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2718 self._match_text_seq("KEY") 2719 expressions = self._parse_wrapped_id_vars() 2720 return self.expression(expr_type, expressions=expressions) 2721 2722 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2723 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2724 prop = self._parse_system_versioning_property(with_=True) 2725 self._match_r_paren() 2726 return prop 2727 2728 if self._match(TokenType.L_PAREN, advance=False): 2729 return self._parse_wrapped_properties() 2730 2731 if self._match_text_seq("JOURNAL"): 2732 return self._parse_withjournaltable() 2733 2734 if self._match_texts(self.VIEW_ATTRIBUTES): 2735 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2736 2737 if self._match_text_seq("DATA"): 2738 return self._parse_withdata(no=False) 2739 elif self._match_text_seq("NO", "DATA"): 2740 return self._parse_withdata(no=True) 2741 2742 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2743 return self._parse_serde_properties(with_=True) 2744 2745 if self._match(TokenType.SCHEMA): 2746 return self.expression( 2747 exp.WithSchemaBindingProperty, 2748 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2749 ) 2750 2751 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2752 return self.expression( 2753 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2754 ) 2755 2756 if not self._next: 2757 return None 2758 2759 return self._parse_withisolatedloading() 2760 2761 def _parse_procedure_option(self) -> exp.Expression | None: 2762 if self._match_text_seq("EXECUTE", "AS"): 2763 return self.expression( 2764 exp.ExecuteAsProperty, 2765 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2766 or self._parse_string(), 2767 ) 2768 2769 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2770 2771 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2772 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2773 self._match(TokenType.EQ) 2774 2775 user = self._parse_id_var() 2776 self._match(TokenType.PARAMETER) 2777 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2778 2779 if not user or not host: 2780 return None 2781 2782 return exp.DefinerProperty(this=f"{user}@{host}") 2783 2784 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2785 self._match(TokenType.TABLE) 2786 self._match(TokenType.EQ) 2787 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2788 2789 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2790 return self.expression(exp.LogProperty, no=no) 2791 2792 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2793 return self.expression(exp.JournalProperty, **kwargs) 2794 2795 def _parse_checksum(self) -> exp.ChecksumProperty: 2796 self._match(TokenType.EQ) 2797 2798 on = None 2799 if self._match(TokenType.ON): 2800 on = True 2801 elif self._match_text_seq("OFF"): 2802 on = False 2803 2804 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2805 2806 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2807 return self.expression( 2808 exp.Cluster, 2809 expressions=( 2810 self._parse_wrapped_csv(self._parse_ordered) 2811 if wrapped 2812 else self._parse_csv(self._parse_ordered) 2813 ), 2814 ) 2815 2816 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2817 self._match_text_seq("BY") 2818 2819 self._match_l_paren() 2820 expressions = self._parse_csv(self._parse_column) 2821 self._match_r_paren() 2822 2823 if self._match_text_seq("SORTED", "BY"): 2824 self._match_l_paren() 2825 sorted_by = self._parse_csv(self._parse_ordered) 2826 self._match_r_paren() 2827 else: 2828 sorted_by = None 2829 2830 self._match(TokenType.INTO) 2831 buckets = self._parse_number() 2832 self._match_text_seq("BUCKETS") 2833 2834 return self.expression( 2835 exp.ClusteredByProperty, 2836 expressions=expressions, 2837 sorted_by=sorted_by, 2838 buckets=buckets, 2839 ) 2840 2841 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2842 if not self._match_text_seq("GRANTS"): 2843 self._retreat(self._index - 1) 2844 return None 2845 2846 return self.expression(exp.CopyGrantsProperty) 2847 2848 def _parse_freespace(self) -> exp.FreespaceProperty: 2849 self._match(TokenType.EQ) 2850 return self.expression( 2851 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2852 ) 2853 2854 def _parse_mergeblockratio( 2855 self, no: bool = False, default: bool = False 2856 ) -> exp.MergeBlockRatioProperty: 2857 if self._match(TokenType.EQ): 2858 return self.expression( 2859 exp.MergeBlockRatioProperty, 2860 this=self._parse_number(), 2861 percent=self._match(TokenType.PERCENT), 2862 ) 2863 2864 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2865 2866 def _parse_datablocksize( 2867 self, 2868 default: t.Optional[bool] = None, 2869 minimum: t.Optional[bool] = None, 2870 maximum: t.Optional[bool] = None, 2871 ) -> exp.DataBlocksizeProperty: 2872 self._match(TokenType.EQ) 2873 size = self._parse_number() 2874 2875 units = None 2876 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2877 units = self._prev.text 2878 2879 return self.expression( 2880 exp.DataBlocksizeProperty, 2881 size=size, 2882 units=units, 2883 default=default, 2884 minimum=minimum, 2885 maximum=maximum, 2886 ) 2887 2888 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2889 self._match(TokenType.EQ) 2890 always = self._match_text_seq("ALWAYS") 2891 manual = self._match_text_seq("MANUAL") 2892 never = self._match_text_seq("NEVER") 2893 default = self._match_text_seq("DEFAULT") 2894 2895 autotemp = None 2896 if self._match_text_seq("AUTOTEMP"): 2897 autotemp = self._parse_schema() 2898 2899 return self.expression( 2900 exp.BlockCompressionProperty, 2901 always=always, 2902 manual=manual, 2903 never=never, 2904 default=default, 2905 autotemp=autotemp, 2906 ) 2907 2908 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2909 index = self._index 2910 no = self._match_text_seq("NO") 2911 concurrent = self._match_text_seq("CONCURRENT") 2912 2913 if not self._match_text_seq("ISOLATED", "LOADING"): 2914 self._retreat(index) 2915 return None 2916 2917 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2918 return self.expression( 2919 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2920 ) 2921 2922 def _parse_locking(self) -> exp.LockingProperty: 2923 if self._match(TokenType.TABLE): 2924 kind = "TABLE" 2925 elif self._match(TokenType.VIEW): 2926 kind = "VIEW" 2927 elif self._match(TokenType.ROW): 2928 kind = "ROW" 2929 elif self._match_text_seq("DATABASE"): 2930 kind = "DATABASE" 2931 else: 2932 kind = None 2933 2934 if kind in ("DATABASE", "TABLE", "VIEW"): 2935 this = self._parse_table_parts() 2936 else: 2937 this = None 2938 2939 if self._match(TokenType.FOR): 2940 for_or_in = "FOR" 2941 elif self._match(TokenType.IN): 2942 for_or_in = "IN" 2943 else: 2944 for_or_in = None 2945 2946 if self._match_text_seq("ACCESS"): 2947 lock_type = "ACCESS" 2948 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2949 lock_type = "EXCLUSIVE" 2950 elif self._match_text_seq("SHARE"): 2951 lock_type = "SHARE" 2952 elif self._match_text_seq("READ"): 2953 lock_type = "READ" 2954 elif self._match_text_seq("WRITE"): 2955 lock_type = "WRITE" 2956 elif self._match_text_seq("CHECKSUM"): 2957 lock_type = "CHECKSUM" 2958 else: 2959 lock_type = None 2960 2961 override = self._match_text_seq("OVERRIDE") 2962 2963 return self.expression( 2964 exp.LockingProperty, 2965 this=this, 2966 kind=kind, 2967 for_or_in=for_or_in, 2968 lock_type=lock_type, 2969 override=override, 2970 ) 2971 2972 def _parse_partition_by(self) -> t.List[exp.Expression]: 2973 if self._match(TokenType.PARTITION_BY): 2974 return self._parse_csv(self._parse_disjunction) 2975 return [] 2976 2977 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2978 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2979 if self._match_text_seq("MINVALUE"): 2980 return exp.var("MINVALUE") 2981 if self._match_text_seq("MAXVALUE"): 2982 return exp.var("MAXVALUE") 2983 return self._parse_bitwise() 2984 2985 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2986 expression = None 2987 from_expressions = None 2988 to_expressions = None 2989 2990 if self._match(TokenType.IN): 2991 this = self._parse_wrapped_csv(self._parse_bitwise) 2992 elif self._match(TokenType.FROM): 2993 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2994 self._match_text_seq("TO") 2995 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2996 elif self._match_text_seq("WITH", "(", "MODULUS"): 2997 this = self._parse_number() 2998 self._match_text_seq(",", "REMAINDER") 2999 expression = self._parse_number() 3000 self._match_r_paren() 3001 else: 3002 self.raise_error("Failed to parse partition bound spec.") 3003 3004 return self.expression( 3005 exp.PartitionBoundSpec, 3006 this=this, 3007 expression=expression, 3008 from_expressions=from_expressions, 3009 to_expressions=to_expressions, 3010 ) 3011 3012 # https://www.postgresql.org/docs/current/sql-createtable.html 3013 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 3014 if not self._match_text_seq("OF"): 3015 self._retreat(self._index - 1) 3016 return None 3017 3018 this = self._parse_table(schema=True) 3019 3020 if self._match(TokenType.DEFAULT): 3021 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 3022 elif self._match_text_seq("FOR", "VALUES"): 3023 expression = self._parse_partition_bound_spec() 3024 else: 3025 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 3026 3027 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 3028 3029 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 3030 self._match(TokenType.EQ) 3031 return self.expression( 3032 exp.PartitionedByProperty, 3033 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 3034 ) 3035 3036 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 3037 if self._match_text_seq("AND", "STATISTICS"): 3038 statistics = True 3039 elif self._match_text_seq("AND", "NO", "STATISTICS"): 3040 statistics = False 3041 else: 3042 statistics = None 3043 3044 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 3045 3046 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 3047 if self._match_text_seq("SQL"): 3048 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 3049 return None 3050 3051 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 3052 if self._match_text_seq("SQL", "DATA"): 3053 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 3054 return None 3055 3056 def _parse_no_property(self) -> t.Optional[exp.Expression]: 3057 if self._match_text_seq("PRIMARY", "INDEX"): 3058 return exp.NoPrimaryIndexProperty() 3059 if self._match_text_seq("SQL"): 3060 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 3061 return None 3062 3063 def _parse_on_property(self) -> t.Optional[exp.Expression]: 3064 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 3065 return exp.OnCommitProperty() 3066 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 3067 return exp.OnCommitProperty(delete=True) 3068 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 3069 3070 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 3071 if self._match_text_seq("SQL", "DATA"): 3072 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 3073 return None 3074 3075 def _parse_distkey(self) -> exp.DistKeyProperty: 3076 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 3077 3078 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 3079 table = self._parse_table(schema=True) 3080 3081 options = [] 3082 while self._match_texts(("INCLUDING", "EXCLUDING")): 3083 this = self._prev.text.upper() 3084 3085 id_var = self._parse_id_var() 3086 if not id_var: 3087 return None 3088 3089 options.append( 3090 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 3091 ) 3092 3093 return self.expression(exp.LikeProperty, this=table, expressions=options) 3094 3095 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 3096 return self.expression( 3097 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 3098 ) 3099 3100 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 3101 self._match(TokenType.EQ) 3102 return self.expression( 3103 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 3104 ) 3105 3106 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 3107 self._match_text_seq("WITH", "CONNECTION") 3108 return self.expression( 3109 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 3110 ) 3111 3112 def _parse_returns(self) -> exp.ReturnsProperty: 3113 value: t.Optional[exp.Expression] 3114 null = None 3115 is_table = self._match(TokenType.TABLE) 3116 3117 if is_table: 3118 if self._match(TokenType.LT): 3119 value = self.expression( 3120 exp.Schema, 3121 this="TABLE", 3122 expressions=self._parse_csv(self._parse_struct_types), 3123 ) 3124 if not self._match(TokenType.GT): 3125 self.raise_error("Expecting >") 3126 else: 3127 value = self._parse_schema(exp.var("TABLE")) 3128 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 3129 null = True 3130 value = None 3131 else: 3132 value = self._parse_types() 3133 3134 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 3135 3136 def _parse_describe(self) -> exp.Describe: 3137 kind = self._match_set(self.CREATABLES) and self._prev.text 3138 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 3139 if self._match(TokenType.DOT): 3140 style = None 3141 self._retreat(self._index - 2) 3142 3143 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 3144 3145 if self._match_set(self.STATEMENT_PARSERS, advance=False): 3146 this = self._parse_statement() 3147 else: 3148 this = self._parse_table(schema=True) 3149 3150 properties = self._parse_properties() 3151 expressions = properties.expressions if properties else None 3152 partition = self._parse_partition() 3153 return self.expression( 3154 exp.Describe, 3155 this=this, 3156 style=style, 3157 kind=kind, 3158 expressions=expressions, 3159 partition=partition, 3160 format=format, 3161 as_json=self._match_text_seq("AS", "JSON"), 3162 ) 3163 3164 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 3165 kind = self._prev.text.upper() 3166 expressions = [] 3167 3168 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 3169 if self._match(TokenType.WHEN): 3170 expression = self._parse_disjunction() 3171 self._match(TokenType.THEN) 3172 else: 3173 expression = None 3174 3175 else_ = self._match(TokenType.ELSE) 3176 3177 if not self._match(TokenType.INTO): 3178 return None 3179 3180 return self.expression( 3181 exp.ConditionalInsert, 3182 this=self.expression( 3183 exp.Insert, 3184 this=self._parse_table(schema=True), 3185 expression=self._parse_derived_table_values(), 3186 ), 3187 expression=expression, 3188 else_=else_, 3189 ) 3190 3191 expression = parse_conditional_insert() 3192 while expression is not None: 3193 expressions.append(expression) 3194 expression = parse_conditional_insert() 3195 3196 return self.expression( 3197 exp.MultitableInserts, 3198 kind=kind, 3199 comments=comments, 3200 expressions=expressions, 3201 source=self._parse_table(), 3202 ) 3203 3204 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 3205 comments = [] 3206 hint = self._parse_hint() 3207 overwrite = self._match(TokenType.OVERWRITE) 3208 ignore = self._match(TokenType.IGNORE) 3209 local = self._match_text_seq("LOCAL") 3210 alternative = None 3211 is_function = None 3212 3213 if self._match_text_seq("DIRECTORY"): 3214 this: t.Optional[exp.Expression] = self.expression( 3215 exp.Directory, 3216 this=self._parse_var_or_string(), 3217 local=local, 3218 row_format=self._parse_row_format(match_row=True), 3219 ) 3220 else: 3221 if self._match_set((TokenType.FIRST, TokenType.ALL)): 3222 comments += ensure_list(self._prev_comments) 3223 return self._parse_multitable_inserts(comments) 3224 3225 if self._match(TokenType.OR): 3226 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 3227 3228 self._match(TokenType.INTO) 3229 comments += ensure_list(self._prev_comments) 3230 self._match(TokenType.TABLE) 3231 is_function = self._match(TokenType.FUNCTION) 3232 3233 this = self._parse_function() if is_function else self._parse_insert_table() 3234 3235 returning = self._parse_returning() # TSQL allows RETURNING before source 3236 3237 return self.expression( 3238 exp.Insert, 3239 comments=comments, 3240 hint=hint, 3241 is_function=is_function, 3242 this=this, 3243 stored=self._match_text_seq("STORED") and self._parse_stored(), 3244 by_name=self._match_text_seq("BY", "NAME"), 3245 exists=self._parse_exists(), 3246 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 3247 and self._parse_disjunction(), 3248 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 3249 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 3250 default=self._match_text_seq("DEFAULT", "VALUES"), 3251 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 3252 conflict=self._parse_on_conflict(), 3253 returning=returning or self._parse_returning(), 3254 overwrite=overwrite, 3255 alternative=alternative, 3256 ignore=ignore, 3257 source=self._match(TokenType.TABLE) and self._parse_table(), 3258 ) 3259 3260 def _parse_insert_table(self) -> t.Optional[exp.Expression]: 3261 this = self._parse_table(schema=True, parse_partition=True) 3262 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 3263 this.set("alias", self._parse_table_alias()) 3264 return this 3265 3266 def _parse_kill(self) -> exp.Kill: 3267 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 3268 3269 return self.expression( 3270 exp.Kill, 3271 this=self._parse_primary(), 3272 kind=kind, 3273 ) 3274 3275 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 3276 conflict = self._match_text_seq("ON", "CONFLICT") 3277 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 3278 3279 if not conflict and not duplicate: 3280 return None 3281 3282 conflict_keys = None 3283 constraint = None 3284 3285 if conflict: 3286 if self._match_text_seq("ON", "CONSTRAINT"): 3287 constraint = self._parse_id_var() 3288 elif self._match(TokenType.L_PAREN): 3289 conflict_keys = self._parse_csv(self._parse_id_var) 3290 self._match_r_paren() 3291 3292 index_predicate = self._parse_where() 3293 3294 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3295 if self._prev.token_type == TokenType.UPDATE: 3296 self._match(TokenType.SET) 3297 expressions = self._parse_csv(self._parse_equality) 3298 else: 3299 expressions = None 3300 3301 return self.expression( 3302 exp.OnConflict, 3303 duplicate=duplicate, 3304 expressions=expressions, 3305 action=action, 3306 conflict_keys=conflict_keys, 3307 index_predicate=index_predicate, 3308 constraint=constraint, 3309 where=self._parse_where(), 3310 ) 3311 3312 def _parse_returning(self) -> t.Optional[exp.Returning]: 3313 if not self._match(TokenType.RETURNING): 3314 return None 3315 return self.expression( 3316 exp.Returning, 3317 expressions=self._parse_csv(self._parse_expression), 3318 into=self._match(TokenType.INTO) and self._parse_table_part(), 3319 ) 3320 3321 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3322 if not self._match(TokenType.FORMAT): 3323 return None 3324 return self._parse_row_format() 3325 3326 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3327 index = self._index 3328 with_ = with_ or self._match_text_seq("WITH") 3329 3330 if not self._match(TokenType.SERDE_PROPERTIES): 3331 self._retreat(index) 3332 return None 3333 return self.expression( 3334 exp.SerdeProperties, 3335 expressions=self._parse_wrapped_properties(), 3336 with_=with_, 3337 ) 3338 3339 def _parse_row_format( 3340 self, match_row: bool = False 3341 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3342 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3343 return None 3344 3345 if self._match_text_seq("SERDE"): 3346 this = self._parse_string() 3347 3348 serde_properties = self._parse_serde_properties() 3349 3350 return self.expression( 3351 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3352 ) 3353 3354 self._match_text_seq("DELIMITED") 3355 3356 kwargs = {} 3357 3358 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3359 kwargs["fields"] = self._parse_string() 3360 if self._match_text_seq("ESCAPED", "BY"): 3361 kwargs["escaped"] = self._parse_string() 3362 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3363 kwargs["collection_items"] = self._parse_string() 3364 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3365 kwargs["map_keys"] = self._parse_string() 3366 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3367 kwargs["lines"] = self._parse_string() 3368 if self._match_text_seq("NULL", "DEFINED", "AS"): 3369 kwargs["null"] = self._parse_string() 3370 3371 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3372 3373 def _parse_load(self) -> exp.LoadData | exp.Command: 3374 if self._match_text_seq("DATA"): 3375 local = self._match_text_seq("LOCAL") 3376 self._match_text_seq("INPATH") 3377 inpath = self._parse_string() 3378 overwrite = self._match(TokenType.OVERWRITE) 3379 self._match_pair(TokenType.INTO, TokenType.TABLE) 3380 3381 return self.expression( 3382 exp.LoadData, 3383 this=self._parse_table(schema=True), 3384 local=local, 3385 overwrite=overwrite, 3386 inpath=inpath, 3387 partition=self._parse_partition(), 3388 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3389 serde=self._match_text_seq("SERDE") and self._parse_string(), 3390 ) 3391 return self._parse_as_command(self._prev) 3392 3393 def _parse_delete(self) -> exp.Delete: 3394 # This handles MySQL's "Multiple-Table Syntax" 3395 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3396 tables = None 3397 if not self._match(TokenType.FROM, advance=False): 3398 tables = self._parse_csv(self._parse_table) or None 3399 3400 returning = self._parse_returning() 3401 3402 return self.expression( 3403 exp.Delete, 3404 tables=tables, 3405 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3406 using=self._match(TokenType.USING) 3407 and self._parse_csv(lambda: self._parse_table(joins=True)), 3408 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3409 where=self._parse_where(), 3410 returning=returning or self._parse_returning(), 3411 order=self._parse_order(), 3412 limit=self._parse_limit(), 3413 ) 3414 3415 def _parse_update(self) -> exp.Update: 3416 kwargs: t.Dict[str, t.Any] = { 3417 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3418 } 3419 while self._curr: 3420 if self._match(TokenType.SET): 3421 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3422 elif self._match(TokenType.RETURNING, advance=False): 3423 kwargs["returning"] = self._parse_returning() 3424 elif self._match(TokenType.FROM, advance=False): 3425 from_ = self._parse_from(joins=True) 3426 table = from_.this if from_ else None 3427 if isinstance(table, exp.Subquery) and self._match(TokenType.JOIN, advance=False): 3428 table.set("joins", list(self._parse_joins()) or None) 3429 3430 kwargs["from_"] = from_ 3431 elif self._match(TokenType.WHERE, advance=False): 3432 kwargs["where"] = self._parse_where() 3433 elif self._match(TokenType.ORDER_BY, advance=False): 3434 kwargs["order"] = self._parse_order() 3435 elif self._match(TokenType.LIMIT, advance=False): 3436 kwargs["limit"] = self._parse_limit() 3437 else: 3438 break 3439 3440 return self.expression(exp.Update, **kwargs) 3441 3442 def _parse_use(self) -> exp.Use: 3443 return self.expression( 3444 exp.Use, 3445 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3446 this=self._parse_table(schema=False), 3447 ) 3448 3449 def _parse_uncache(self) -> exp.Uncache: 3450 if not self._match(TokenType.TABLE): 3451 self.raise_error("Expecting TABLE after UNCACHE") 3452 3453 return self.expression( 3454 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3455 ) 3456 3457 def _parse_cache(self) -> exp.Cache: 3458 lazy = self._match_text_seq("LAZY") 3459 self._match(TokenType.TABLE) 3460 table = self._parse_table(schema=True) 3461 3462 options = [] 3463 if self._match_text_seq("OPTIONS"): 3464 self._match_l_paren() 3465 k = self._parse_string() 3466 self._match(TokenType.EQ) 3467 v = self._parse_string() 3468 options = [k, v] 3469 self._match_r_paren() 3470 3471 self._match(TokenType.ALIAS) 3472 return self.expression( 3473 exp.Cache, 3474 this=table, 3475 lazy=lazy, 3476 options=options, 3477 expression=self._parse_select(nested=True), 3478 ) 3479 3480 def _parse_partition(self) -> t.Optional[exp.Partition]: 3481 if not self._match_texts(self.PARTITION_KEYWORDS): 3482 return None 3483 3484 return self.expression( 3485 exp.Partition, 3486 subpartition=self._prev.text.upper() == "SUBPARTITION", 3487 expressions=self._parse_wrapped_csv(self._parse_disjunction), 3488 ) 3489 3490 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3491 def _parse_value_expression() -> t.Optional[exp.Expression]: 3492 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3493 return exp.var(self._prev.text.upper()) 3494 return self._parse_expression() 3495 3496 if self._match(TokenType.L_PAREN): 3497 expressions = self._parse_csv(_parse_value_expression) 3498 self._match_r_paren() 3499 return self.expression(exp.Tuple, expressions=expressions) 3500 3501 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3502 expression = self._parse_expression() 3503 if expression: 3504 return self.expression(exp.Tuple, expressions=[expression]) 3505 return None 3506 3507 def _parse_projections( 3508 self, 3509 ) -> t.Tuple[t.List[exp.Expression], t.Optional[t.List[exp.Expression]]]: 3510 return self._parse_expressions(), None 3511 3512 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3513 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3514 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3515 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3516 ) 3517 elif self._match(TokenType.FROM): 3518 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3519 # Support parentheses for duckdb FROM-first syntax 3520 select = self._parse_select(from_=from_) 3521 if select: 3522 if not select.args.get("from_"): 3523 select.set("from_", from_) 3524 this = select 3525 else: 3526 this = exp.select("*").from_(t.cast(exp.From, from_)) 3527 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3528 else: 3529 this = ( 3530 self._parse_table(consume_pipe=True) 3531 if table 3532 else self._parse_select(nested=True, parse_set_operation=False) 3533 ) 3534 3535 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3536 # in case a modifier (e.g. join) is following 3537 if table and isinstance(this, exp.Values) and this.alias: 3538 alias = this.args["alias"].pop() 3539 this = exp.Table(this=this, alias=alias) 3540 3541 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3542 3543 return this 3544 3545 def _parse_select( 3546 self, 3547 nested: bool = False, 3548 table: bool = False, 3549 parse_subquery_alias: bool = True, 3550 parse_set_operation: bool = True, 3551 consume_pipe: bool = True, 3552 from_: t.Optional[exp.From] = None, 3553 ) -> t.Optional[exp.Expression]: 3554 query = self._parse_select_query( 3555 nested=nested, 3556 table=table, 3557 parse_subquery_alias=parse_subquery_alias, 3558 parse_set_operation=parse_set_operation, 3559 ) 3560 3561 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3562 if not query and from_: 3563 query = exp.select("*").from_(from_) 3564 if isinstance(query, exp.Query): 3565 query = self._parse_pipe_syntax_query(query) 3566 query = query.subquery(copy=False) if query and table else query 3567 3568 return query 3569 3570 def _parse_select_query( 3571 self, 3572 nested: bool = False, 3573 table: bool = False, 3574 parse_subquery_alias: bool = True, 3575 parse_set_operation: bool = True, 3576 ) -> t.Optional[exp.Expression]: 3577 cte = self._parse_with() 3578 3579 if cte: 3580 this = self._parse_statement() 3581 3582 if not this: 3583 self.raise_error("Failed to parse any statement following CTE") 3584 return cte 3585 3586 while isinstance(this, exp.Subquery) and this.is_wrapper: 3587 this = this.this 3588 3589 if "with_" in this.arg_types: 3590 this.set("with_", cte) 3591 else: 3592 self.raise_error(f"{this.key} does not support CTE") 3593 this = cte 3594 3595 return this 3596 3597 # duckdb supports leading with FROM x 3598 from_ = ( 3599 self._parse_from(joins=True, consume_pipe=True) 3600 if self._match(TokenType.FROM, advance=False) 3601 else None 3602 ) 3603 3604 if self._match(TokenType.SELECT): 3605 comments = self._prev_comments 3606 3607 hint = self._parse_hint() 3608 3609 if self._next and not self._next.token_type == TokenType.DOT: 3610 all_ = self._match(TokenType.ALL) 3611 matched_distinct = self._match_set(self.DISTINCT_TOKENS) 3612 else: 3613 all_, matched_distinct = None, False 3614 3615 kind = ( 3616 self._prev.text.upper() 3617 if self._match(TokenType.ALIAS) and self._match_texts(("STRUCT", "VALUE")) 3618 else None 3619 ) 3620 3621 distinct: t.Optional[exp.Expression] = ( 3622 self.expression( 3623 exp.Distinct, 3624 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3625 ) 3626 if matched_distinct 3627 else None 3628 ) 3629 3630 if all_ and distinct: 3631 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3632 3633 operation_modifiers = [] 3634 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3635 operation_modifiers.append(exp.var(self._prev.text.upper())) 3636 3637 limit = self._parse_limit(top=True) 3638 projections, exclude = self._parse_projections() 3639 3640 this = self.expression( 3641 exp.Select, 3642 kind=kind, 3643 hint=hint, 3644 distinct=distinct, 3645 expressions=projections, 3646 limit=limit, 3647 exclude=exclude, 3648 operation_modifiers=operation_modifiers or None, 3649 ) 3650 this.comments = comments 3651 3652 into = self._parse_into() 3653 if into: 3654 this.set("into", into) 3655 3656 if not from_: 3657 from_ = self._parse_from() 3658 3659 if from_: 3660 this.set("from_", from_) 3661 3662 this = self._parse_query_modifiers(this) 3663 elif (table or nested) and self._match(TokenType.L_PAREN): 3664 comments = self._prev_comments 3665 this = self._parse_wrapped_select(table=table) 3666 3667 if this: 3668 this.add_comments(comments, prepend=True) 3669 3670 # We return early here so that the UNION isn't attached to the subquery by the 3671 # following call to _parse_set_operations, but instead becomes the parent node 3672 self._match_r_paren() 3673 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3674 elif self._match(TokenType.VALUES, advance=False): 3675 this = self._parse_derived_table_values() 3676 elif from_: 3677 this = exp.select("*").from_(from_.this, copy=False) 3678 elif self._match(TokenType.SUMMARIZE): 3679 table = self._match(TokenType.TABLE) 3680 this = self._parse_select() or self._parse_string() or self._parse_table() 3681 return self.expression(exp.Summarize, this=this, table=table) 3682 elif self._match(TokenType.DESCRIBE): 3683 this = self._parse_describe() 3684 else: 3685 this = None 3686 3687 return self._parse_set_operations(this) if parse_set_operation else this 3688 3689 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3690 self._match_text_seq("SEARCH") 3691 3692 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3693 3694 if not kind: 3695 return None 3696 3697 self._match_text_seq("FIRST", "BY") 3698 3699 return self.expression( 3700 exp.RecursiveWithSearch, 3701 kind=kind, 3702 this=self._parse_id_var(), 3703 expression=self._match_text_seq("SET") and self._parse_id_var(), 3704 using=self._match_text_seq("USING") and self._parse_id_var(), 3705 ) 3706 3707 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3708 if not skip_with_token and not self._match(TokenType.WITH): 3709 return None 3710 3711 comments = self._prev_comments 3712 recursive = self._match(TokenType.RECURSIVE) 3713 3714 last_comments = None 3715 expressions = [] 3716 while True: 3717 cte = self._parse_cte() 3718 if isinstance(cte, exp.CTE): 3719 expressions.append(cte) 3720 if last_comments: 3721 cte.add_comments(last_comments) 3722 3723 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3724 break 3725 else: 3726 self._match(TokenType.WITH) 3727 3728 last_comments = self._prev_comments 3729 3730 return self.expression( 3731 exp.With, 3732 comments=comments, 3733 expressions=expressions, 3734 recursive=recursive or None, 3735 search=self._parse_recursive_with_search(), 3736 ) 3737 3738 def _parse_cte(self) -> t.Optional[exp.CTE]: 3739 index = self._index 3740 3741 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3742 if not alias or not alias.this: 3743 self.raise_error("Expected CTE to have alias") 3744 3745 key_expressions = ( 3746 self._parse_wrapped_id_vars() if self._match_text_seq("USING", "KEY") else None 3747 ) 3748 3749 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3750 self._retreat(index) 3751 return None 3752 3753 comments = self._prev_comments 3754 3755 if self._match_text_seq("NOT", "MATERIALIZED"): 3756 materialized = False 3757 elif self._match_text_seq("MATERIALIZED"): 3758 materialized = True 3759 else: 3760 materialized = None 3761 3762 cte = self.expression( 3763 exp.CTE, 3764 this=self._parse_wrapped(self._parse_statement), 3765 alias=alias, 3766 materialized=materialized, 3767 key_expressions=key_expressions, 3768 comments=comments, 3769 ) 3770 3771 values = cte.this 3772 if isinstance(values, exp.Values): 3773 if values.alias: 3774 cte.set("this", exp.select("*").from_(values)) 3775 else: 3776 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3777 3778 return cte 3779 3780 def _parse_table_alias( 3781 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3782 ) -> t.Optional[exp.TableAlias]: 3783 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3784 # so this section tries to parse the clause version and if it fails, it treats the token 3785 # as an identifier (alias) 3786 if self._can_parse_limit_or_offset(): 3787 return None 3788 3789 any_token = self._match(TokenType.ALIAS) 3790 alias = ( 3791 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3792 or self._parse_string_as_identifier() 3793 ) 3794 3795 index = self._index 3796 if self._match(TokenType.L_PAREN): 3797 columns = self._parse_csv(self._parse_function_parameter) 3798 self._match_r_paren() if columns else self._retreat(index) 3799 else: 3800 columns = None 3801 3802 if not alias and not columns: 3803 return None 3804 3805 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3806 3807 # We bubble up comments from the Identifier to the TableAlias 3808 if isinstance(alias, exp.Identifier): 3809 table_alias.add_comments(alias.pop_comments()) 3810 3811 return table_alias 3812 3813 def _parse_subquery( 3814 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3815 ) -> t.Optional[exp.Subquery]: 3816 if not this: 3817 return None 3818 3819 return self.expression( 3820 exp.Subquery, 3821 this=this, 3822 pivots=self._parse_pivots(), 3823 alias=self._parse_table_alias() if parse_alias else None, 3824 sample=self._parse_table_sample(), 3825 ) 3826 3827 def _implicit_unnests_to_explicit(self, this: E) -> E: 3828 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3829 3830 refs = {_norm(this.args["from_"].this.copy(), dialect=self.dialect).alias_or_name} 3831 for i, join in enumerate(this.args.get("joins") or []): 3832 table = join.this 3833 normalized_table = table.copy() 3834 normalized_table.meta["maybe_column"] = True 3835 normalized_table = _norm(normalized_table, dialect=self.dialect) 3836 3837 if isinstance(table, exp.Table) and not join.args.get("on"): 3838 if normalized_table.parts[0].name in refs: 3839 table_as_column = table.to_column() 3840 unnest = exp.Unnest(expressions=[table_as_column]) 3841 3842 # Table.to_column creates a parent Alias node that we want to convert to 3843 # a TableAlias and attach to the Unnest, so it matches the parser's output 3844 if isinstance(table.args.get("alias"), exp.TableAlias): 3845 table_as_column.replace(table_as_column.this) 3846 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3847 3848 table.replace(unnest) 3849 3850 refs.add(normalized_table.alias_or_name) 3851 3852 return this 3853 3854 @t.overload 3855 def _parse_query_modifiers(self, this: E) -> E: ... 3856 3857 @t.overload 3858 def _parse_query_modifiers(self, this: None) -> None: ... 3859 3860 def _parse_query_modifiers(self, this): 3861 if isinstance(this, self.MODIFIABLES): 3862 for join in self._parse_joins(): 3863 this.append("joins", join) 3864 for lateral in iter(self._parse_lateral, None): 3865 this.append("laterals", lateral) 3866 3867 while True: 3868 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3869 modifier_token = self._curr 3870 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3871 key, expression = parser(self) 3872 3873 if expression: 3874 if this.args.get(key): 3875 self.raise_error( 3876 f"Found multiple '{modifier_token.text.upper()}' clauses", 3877 token=modifier_token, 3878 ) 3879 3880 this.set(key, expression) 3881 if key == "limit": 3882 offset = expression.args.get("offset") 3883 expression.set("offset", None) 3884 3885 if offset: 3886 offset = exp.Offset(expression=offset) 3887 this.set("offset", offset) 3888 3889 limit_by_expressions = expression.expressions 3890 expression.set("expressions", None) 3891 offset.set("expressions", limit_by_expressions) 3892 continue 3893 break 3894 3895 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from_"): 3896 this = self._implicit_unnests_to_explicit(this) 3897 3898 return this 3899 3900 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3901 start = self._curr 3902 while self._curr: 3903 self._advance() 3904 3905 end = self._tokens[self._index - 1] 3906 return exp.Hint(expressions=[self._find_sql(start, end)]) 3907 3908 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3909 return self._parse_function_call() 3910 3911 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3912 start_index = self._index 3913 should_fallback_to_string = False 3914 3915 hints = [] 3916 try: 3917 for hint in iter( 3918 lambda: self._parse_csv( 3919 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3920 ), 3921 [], 3922 ): 3923 hints.extend(hint) 3924 except ParseError: 3925 should_fallback_to_string = True 3926 3927 if should_fallback_to_string or self._curr: 3928 self._retreat(start_index) 3929 return self._parse_hint_fallback_to_string() 3930 3931 return self.expression(exp.Hint, expressions=hints) 3932 3933 def _parse_hint(self) -> t.Optional[exp.Hint]: 3934 if self._match(TokenType.HINT) and self._prev_comments: 3935 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3936 3937 return None 3938 3939 def _parse_into(self) -> t.Optional[exp.Into]: 3940 if not self._match(TokenType.INTO): 3941 return None 3942 3943 temp = self._match(TokenType.TEMPORARY) 3944 unlogged = self._match_text_seq("UNLOGGED") 3945 self._match(TokenType.TABLE) 3946 3947 return self.expression( 3948 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3949 ) 3950 3951 def _parse_from( 3952 self, 3953 joins: bool = False, 3954 skip_from_token: bool = False, 3955 consume_pipe: bool = False, 3956 ) -> t.Optional[exp.From]: 3957 if not skip_from_token and not self._match(TokenType.FROM): 3958 return None 3959 3960 return self.expression( 3961 exp.From, 3962 comments=self._prev_comments, 3963 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3964 ) 3965 3966 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3967 return self.expression( 3968 exp.MatchRecognizeMeasure, 3969 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3970 this=self._parse_expression(), 3971 ) 3972 3973 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3974 if not self._match(TokenType.MATCH_RECOGNIZE): 3975 return None 3976 3977 self._match_l_paren() 3978 3979 partition = self._parse_partition_by() 3980 order = self._parse_order() 3981 3982 measures = ( 3983 self._parse_csv(self._parse_match_recognize_measure) 3984 if self._match_text_seq("MEASURES") 3985 else None 3986 ) 3987 3988 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3989 rows = exp.var("ONE ROW PER MATCH") 3990 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3991 text = "ALL ROWS PER MATCH" 3992 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3993 text += " SHOW EMPTY MATCHES" 3994 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3995 text += " OMIT EMPTY MATCHES" 3996 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3997 text += " WITH UNMATCHED ROWS" 3998 rows = exp.var(text) 3999 else: 4000 rows = None 4001 4002 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 4003 text = "AFTER MATCH SKIP" 4004 if self._match_text_seq("PAST", "LAST", "ROW"): 4005 text += " PAST LAST ROW" 4006 elif self._match_text_seq("TO", "NEXT", "ROW"): 4007 text += " TO NEXT ROW" 4008 elif self._match_text_seq("TO", "FIRST"): 4009 text += f" TO FIRST {self._advance_any().text}" # type: ignore 4010 elif self._match_text_seq("TO", "LAST"): 4011 text += f" TO LAST {self._advance_any().text}" # type: ignore 4012 after = exp.var(text) 4013 else: 4014 after = None 4015 4016 if self._match_text_seq("PATTERN"): 4017 self._match_l_paren() 4018 4019 if not self._curr: 4020 self.raise_error("Expecting )", self._curr) 4021 4022 paren = 1 4023 start = self._curr 4024 4025 while self._curr and paren > 0: 4026 if self._curr.token_type == TokenType.L_PAREN: 4027 paren += 1 4028 if self._curr.token_type == TokenType.R_PAREN: 4029 paren -= 1 4030 4031 end = self._prev 4032 self._advance() 4033 4034 if paren > 0: 4035 self.raise_error("Expecting )", self._curr) 4036 4037 pattern = exp.var(self._find_sql(start, end)) 4038 else: 4039 pattern = None 4040 4041 define = ( 4042 self._parse_csv(self._parse_name_as_expression) 4043 if self._match_text_seq("DEFINE") 4044 else None 4045 ) 4046 4047 self._match_r_paren() 4048 4049 return self.expression( 4050 exp.MatchRecognize, 4051 partition_by=partition, 4052 order=order, 4053 measures=measures, 4054 rows=rows, 4055 after=after, 4056 pattern=pattern, 4057 define=define, 4058 alias=self._parse_table_alias(), 4059 ) 4060 4061 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 4062 cross_apply: t.Optional[bool] = None 4063 if self._match_pair(TokenType.CROSS, TokenType.APPLY): 4064 cross_apply = True 4065 elif self._match_pair(TokenType.OUTER, TokenType.APPLY): 4066 cross_apply = False 4067 4068 if cross_apply is not None: 4069 this = self._parse_select(table=True) 4070 view = None 4071 outer = None 4072 elif self._match(TokenType.LATERAL): 4073 this = self._parse_select(table=True) 4074 view = self._match(TokenType.VIEW) 4075 outer = self._match(TokenType.OUTER) 4076 else: 4077 return None 4078 4079 if not this: 4080 this = ( 4081 self._parse_unnest() 4082 or self._parse_function() 4083 or self._parse_id_var(any_token=False) 4084 ) 4085 4086 while self._match(TokenType.DOT): 4087 this = exp.Dot( 4088 this=this, 4089 expression=self._parse_function() or self._parse_id_var(any_token=False), 4090 ) 4091 4092 ordinality: t.Optional[bool] = None 4093 4094 if view: 4095 table = self._parse_id_var(any_token=False) 4096 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 4097 table_alias: t.Optional[exp.TableAlias] = self.expression( 4098 exp.TableAlias, this=table, columns=columns 4099 ) 4100 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 4101 # We move the alias from the lateral's child node to the lateral itself 4102 table_alias = this.args["alias"].pop() 4103 else: 4104 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4105 table_alias = self._parse_table_alias() 4106 4107 return self.expression( 4108 exp.Lateral, 4109 this=this, 4110 view=view, 4111 outer=outer, 4112 alias=table_alias, 4113 cross_apply=cross_apply, 4114 ordinality=ordinality, 4115 ) 4116 4117 def _parse_stream(self) -> t.Optional[exp.Stream]: 4118 index = self._index 4119 if self._match_text_seq("STREAM"): 4120 this = self._try_parse(self._parse_table) 4121 if this: 4122 return self.expression(exp.Stream, this=this) 4123 4124 self._retreat(index) 4125 return None 4126 4127 def _parse_join_parts( 4128 self, 4129 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 4130 return ( 4131 self._prev if self._match_set(self.JOIN_METHODS) else None, 4132 self._prev if self._match_set(self.JOIN_SIDES) else None, 4133 self._prev if self._match_set(self.JOIN_KINDS) else None, 4134 ) 4135 4136 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 4137 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 4138 this = self._parse_column() 4139 if isinstance(this, exp.Column): 4140 return this.this 4141 return this 4142 4143 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 4144 4145 def _parse_join( 4146 self, skip_join_token: bool = False, parse_bracket: bool = False 4147 ) -> t.Optional[exp.Join]: 4148 if self._match(TokenType.COMMA): 4149 table = self._try_parse(self._parse_table) 4150 cross_join = self.expression(exp.Join, this=table) if table else None 4151 4152 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 4153 cross_join.set("kind", "CROSS") 4154 4155 return cross_join 4156 4157 index = self._index 4158 method, side, kind = self._parse_join_parts() 4159 directed = self._match_text_seq("DIRECTED") 4160 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 4161 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 4162 join_comments = self._prev_comments 4163 4164 if not skip_join_token and not join: 4165 self._retreat(index) 4166 kind = None 4167 method = None 4168 side = None 4169 4170 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 4171 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 4172 4173 if not skip_join_token and not join and not outer_apply and not cross_apply: 4174 return None 4175 4176 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 4177 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 4178 kwargs["expressions"] = self._parse_csv( 4179 lambda: self._parse_table(parse_bracket=parse_bracket) 4180 ) 4181 4182 if method: 4183 kwargs["method"] = method.text.upper() 4184 if side: 4185 kwargs["side"] = side.text.upper() 4186 if kind: 4187 kwargs["kind"] = kind.text.upper() 4188 if hint: 4189 kwargs["hint"] = hint 4190 4191 if self._match(TokenType.MATCH_CONDITION): 4192 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 4193 4194 if self._match(TokenType.ON): 4195 kwargs["on"] = self._parse_disjunction() 4196 elif self._match(TokenType.USING): 4197 kwargs["using"] = self._parse_using_identifiers() 4198 elif ( 4199 not method 4200 and not (outer_apply or cross_apply) 4201 and not isinstance(kwargs["this"], exp.Unnest) 4202 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 4203 ): 4204 index = self._index 4205 joins: t.Optional[list] = list(self._parse_joins()) 4206 4207 if joins and self._match(TokenType.ON): 4208 kwargs["on"] = self._parse_disjunction() 4209 elif joins and self._match(TokenType.USING): 4210 kwargs["using"] = self._parse_using_identifiers() 4211 else: 4212 joins = None 4213 self._retreat(index) 4214 4215 kwargs["this"].set("joins", joins if joins else None) 4216 4217 kwargs["pivots"] = self._parse_pivots() 4218 4219 comments = [c for token in (method, side, kind) if token for c in token.comments] 4220 comments = (join_comments or []) + comments 4221 4222 if ( 4223 self.ADD_JOIN_ON_TRUE 4224 and not kwargs.get("on") 4225 and not kwargs.get("using") 4226 and not kwargs.get("method") 4227 and kwargs.get("kind") in (None, "INNER", "OUTER") 4228 ): 4229 kwargs["on"] = exp.true() 4230 4231 if directed: 4232 kwargs["directed"] = directed 4233 4234 return self.expression(exp.Join, comments=comments, **kwargs) 4235 4236 def _parse_opclass(self) -> t.Optional[exp.Expression]: 4237 this = self._parse_disjunction() 4238 4239 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 4240 return this 4241 4242 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 4243 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 4244 4245 return this 4246 4247 def _parse_index_params(self) -> exp.IndexParameters: 4248 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 4249 4250 if self._match(TokenType.L_PAREN, advance=False): 4251 columns = self._parse_wrapped_csv(self._parse_with_operator) 4252 else: 4253 columns = None 4254 4255 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 4256 partition_by = self._parse_partition_by() 4257 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 4258 tablespace = ( 4259 self._parse_var(any_token=True) 4260 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 4261 else None 4262 ) 4263 where = self._parse_where() 4264 4265 on = self._parse_field() if self._match(TokenType.ON) else None 4266 4267 return self.expression( 4268 exp.IndexParameters, 4269 using=using, 4270 columns=columns, 4271 include=include, 4272 partition_by=partition_by, 4273 where=where, 4274 with_storage=with_storage, 4275 tablespace=tablespace, 4276 on=on, 4277 ) 4278 4279 def _parse_index( 4280 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 4281 ) -> t.Optional[exp.Index]: 4282 if index or anonymous: 4283 unique = None 4284 primary = None 4285 amp = None 4286 4287 self._match(TokenType.ON) 4288 self._match(TokenType.TABLE) # hive 4289 table = self._parse_table_parts(schema=True) 4290 else: 4291 unique = self._match(TokenType.UNIQUE) 4292 primary = self._match_text_seq("PRIMARY") 4293 amp = self._match_text_seq("AMP") 4294 4295 if not self._match(TokenType.INDEX): 4296 return None 4297 4298 index = self._parse_id_var() 4299 table = None 4300 4301 params = self._parse_index_params() 4302 4303 return self.expression( 4304 exp.Index, 4305 this=index, 4306 table=table, 4307 unique=unique, 4308 primary=primary, 4309 amp=amp, 4310 params=params, 4311 ) 4312 4313 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 4314 hints: t.List[exp.Expression] = [] 4315 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 4316 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 4317 hints.append( 4318 self.expression( 4319 exp.WithTableHint, 4320 expressions=self._parse_csv( 4321 lambda: self._parse_function() or self._parse_var(any_token=True) 4322 ), 4323 ) 4324 ) 4325 self._match_r_paren() 4326 else: 4327 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 4328 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 4329 hint = exp.IndexTableHint(this=self._prev.text.upper()) 4330 4331 self._match_set((TokenType.INDEX, TokenType.KEY)) 4332 if self._match(TokenType.FOR): 4333 hint.set("target", self._advance_any() and self._prev.text.upper()) 4334 4335 hint.set("expressions", self._parse_wrapped_id_vars()) 4336 hints.append(hint) 4337 4338 return hints or None 4339 4340 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4341 return ( 4342 (not schema and self._parse_function(optional_parens=False)) 4343 or self._parse_id_var(any_token=False) 4344 or self._parse_string_as_identifier() 4345 or self._parse_placeholder() 4346 ) 4347 4348 def _parse_table_parts( 4349 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4350 ) -> exp.Table: 4351 catalog = None 4352 db = None 4353 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4354 4355 while self._match(TokenType.DOT): 4356 if catalog: 4357 # This allows nesting the table in arbitrarily many dot expressions if needed 4358 table = self.expression( 4359 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4360 ) 4361 else: 4362 catalog = db 4363 db = table 4364 # "" used for tsql FROM a..b case 4365 table = self._parse_table_part(schema=schema) or "" 4366 4367 if ( 4368 wildcard 4369 and self._is_connected() 4370 and (isinstance(table, exp.Identifier) or not table) 4371 and self._match(TokenType.STAR) 4372 ): 4373 if isinstance(table, exp.Identifier): 4374 table.args["this"] += "*" 4375 else: 4376 table = exp.Identifier(this="*") 4377 4378 # We bubble up comments from the Identifier to the Table 4379 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4380 4381 if is_db_reference: 4382 catalog = db 4383 db = table 4384 table = None 4385 4386 if not table and not is_db_reference: 4387 self.raise_error(f"Expected table name but got {self._curr}") 4388 if not db and is_db_reference: 4389 self.raise_error(f"Expected database name but got {self._curr}") 4390 4391 table = self.expression( 4392 exp.Table, 4393 comments=comments, 4394 this=table, 4395 db=db, 4396 catalog=catalog, 4397 ) 4398 4399 changes = self._parse_changes() 4400 if changes: 4401 table.set("changes", changes) 4402 4403 at_before = self._parse_historical_data() 4404 if at_before: 4405 table.set("when", at_before) 4406 4407 pivots = self._parse_pivots() 4408 if pivots: 4409 table.set("pivots", pivots) 4410 4411 return table 4412 4413 def _parse_table( 4414 self, 4415 schema: bool = False, 4416 joins: bool = False, 4417 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4418 parse_bracket: bool = False, 4419 is_db_reference: bool = False, 4420 parse_partition: bool = False, 4421 consume_pipe: bool = False, 4422 ) -> t.Optional[exp.Expression]: 4423 stream = self._parse_stream() 4424 if stream: 4425 return stream 4426 4427 lateral = self._parse_lateral() 4428 if lateral: 4429 return lateral 4430 4431 unnest = self._parse_unnest() 4432 if unnest: 4433 return unnest 4434 4435 values = self._parse_derived_table_values() 4436 if values: 4437 return values 4438 4439 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4440 if subquery: 4441 if not subquery.args.get("pivots"): 4442 subquery.set("pivots", self._parse_pivots()) 4443 return subquery 4444 4445 bracket = parse_bracket and self._parse_bracket(None) 4446 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4447 4448 rows_from_tables = ( 4449 self._parse_wrapped_csv(self._parse_table) 4450 if self._match_text_seq("ROWS", "FROM") 4451 else None 4452 ) 4453 rows_from = ( 4454 self.expression(exp.Table, rows_from=rows_from_tables) if rows_from_tables else None 4455 ) 4456 4457 only = self._match(TokenType.ONLY) 4458 4459 this = t.cast( 4460 exp.Expression, 4461 bracket 4462 or rows_from 4463 or self._parse_bracket( 4464 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4465 ), 4466 ) 4467 4468 if only: 4469 this.set("only", only) 4470 4471 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4472 self._match_text_seq("*") 4473 4474 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4475 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4476 this.set("partition", self._parse_partition()) 4477 4478 if schema: 4479 return self._parse_schema(this=this) 4480 4481 if self.dialect.ALIAS_POST_VERSION: 4482 this.set("version", self._parse_version()) 4483 4484 if self.dialect.ALIAS_POST_TABLESAMPLE: 4485 this.set("sample", self._parse_table_sample()) 4486 4487 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4488 if alias: 4489 this.set("alias", alias) 4490 4491 if self._match(TokenType.INDEXED_BY): 4492 this.set("indexed", self._parse_table_parts()) 4493 elif self._match_text_seq("NOT", "INDEXED"): 4494 this.set("indexed", False) 4495 4496 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4497 return self.expression( 4498 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4499 ) 4500 4501 this.set("hints", self._parse_table_hints()) 4502 4503 if not this.args.get("pivots"): 4504 this.set("pivots", self._parse_pivots()) 4505 4506 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4507 this.set("sample", self._parse_table_sample()) 4508 4509 if not self.dialect.ALIAS_POST_VERSION: 4510 this.set("version", self._parse_version()) 4511 4512 if joins: 4513 for join in self._parse_joins(): 4514 this.append("joins", join) 4515 4516 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4517 this.set("ordinality", True) 4518 this.set("alias", self._parse_table_alias()) 4519 4520 return this 4521 4522 def _parse_version(self) -> t.Optional[exp.Version]: 4523 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4524 this = "TIMESTAMP" 4525 elif self._match(TokenType.VERSION_SNAPSHOT): 4526 this = "VERSION" 4527 else: 4528 return None 4529 4530 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4531 kind = self._prev.text.upper() 4532 start = self._parse_bitwise() 4533 self._match_texts(("TO", "AND")) 4534 end = self._parse_bitwise() 4535 expression: t.Optional[exp.Expression] = self.expression( 4536 exp.Tuple, expressions=[start, end] 4537 ) 4538 elif self._match_text_seq("CONTAINED", "IN"): 4539 kind = "CONTAINED IN" 4540 expression = self.expression( 4541 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4542 ) 4543 elif self._match(TokenType.ALL): 4544 kind = "ALL" 4545 expression = None 4546 else: 4547 self._match_text_seq("AS", "OF") 4548 kind = "AS OF" 4549 expression = self._parse_type() 4550 4551 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4552 4553 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4554 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4555 index = self._index 4556 historical_data = None 4557 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4558 this = self._prev.text.upper() 4559 kind = ( 4560 self._match(TokenType.L_PAREN) 4561 and self._match_texts(self.HISTORICAL_DATA_KIND) 4562 and self._prev.text.upper() 4563 ) 4564 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4565 4566 if expression: 4567 self._match_r_paren() 4568 historical_data = self.expression( 4569 exp.HistoricalData, this=this, kind=kind, expression=expression 4570 ) 4571 else: 4572 self._retreat(index) 4573 4574 return historical_data 4575 4576 def _parse_changes(self) -> t.Optional[exp.Changes]: 4577 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4578 return None 4579 4580 information = self._parse_var(any_token=True) 4581 self._match_r_paren() 4582 4583 return self.expression( 4584 exp.Changes, 4585 information=information, 4586 at_before=self._parse_historical_data(), 4587 end=self._parse_historical_data(), 4588 ) 4589 4590 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4591 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4592 return None 4593 4594 self._advance() 4595 4596 expressions = self._parse_wrapped_csv(self._parse_equality) 4597 offset: t.Union[bool, exp.Expression] = self._match_pair( 4598 TokenType.WITH, TokenType.ORDINALITY 4599 ) 4600 4601 alias = self._parse_table_alias() if with_alias else None 4602 4603 if alias: 4604 if self.dialect.UNNEST_COLUMN_ONLY: 4605 if alias.args.get("columns"): 4606 self.raise_error("Unexpected extra column alias in unnest.") 4607 4608 alias.set("columns", [alias.this]) 4609 alias.set("this", None) 4610 4611 columns = alias.args.get("columns") or [] 4612 if offset and len(expressions) < len(columns): 4613 offset = columns.pop() 4614 4615 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4616 self._match(TokenType.ALIAS) 4617 offset = self._parse_id_var( 4618 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4619 ) or exp.to_identifier("offset") 4620 4621 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4622 4623 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4624 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4625 if not is_derived and not ( 4626 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4627 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4628 ): 4629 return None 4630 4631 expressions = self._parse_csv(self._parse_value) 4632 alias = self._parse_table_alias() 4633 4634 if is_derived: 4635 self._match_r_paren() 4636 4637 return self.expression( 4638 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4639 ) 4640 4641 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4642 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4643 as_modifier and self._match_text_seq("USING", "SAMPLE") 4644 ): 4645 return None 4646 4647 bucket_numerator = None 4648 bucket_denominator = None 4649 bucket_field = None 4650 percent = None 4651 size = None 4652 seed = None 4653 4654 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4655 matched_l_paren = self._match(TokenType.L_PAREN) 4656 4657 if self.TABLESAMPLE_CSV: 4658 num = None 4659 expressions = self._parse_csv(self._parse_primary) 4660 else: 4661 expressions = None 4662 num = ( 4663 self._parse_factor() 4664 if self._match(TokenType.NUMBER, advance=False) 4665 else self._parse_primary() or self._parse_placeholder() 4666 ) 4667 4668 if self._match_text_seq("BUCKET"): 4669 bucket_numerator = self._parse_number() 4670 self._match_text_seq("OUT", "OF") 4671 bucket_denominator = bucket_denominator = self._parse_number() 4672 self._match(TokenType.ON) 4673 bucket_field = self._parse_field() 4674 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4675 percent = num 4676 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4677 size = num 4678 else: 4679 percent = num 4680 4681 if matched_l_paren: 4682 self._match_r_paren() 4683 4684 if self._match(TokenType.L_PAREN): 4685 method = self._parse_var(upper=True) 4686 seed = self._match(TokenType.COMMA) and self._parse_number() 4687 self._match_r_paren() 4688 elif self._match_texts(("SEED", "REPEATABLE")): 4689 seed = self._parse_wrapped(self._parse_number) 4690 4691 if not method and self.DEFAULT_SAMPLING_METHOD: 4692 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4693 4694 return self.expression( 4695 exp.TableSample, 4696 expressions=expressions, 4697 method=method, 4698 bucket_numerator=bucket_numerator, 4699 bucket_denominator=bucket_denominator, 4700 bucket_field=bucket_field, 4701 percent=percent, 4702 size=size, 4703 seed=seed, 4704 ) 4705 4706 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4707 return list(iter(self._parse_pivot, None)) or None 4708 4709 def _parse_joins(self) -> t.Iterator[exp.Join]: 4710 return iter(self._parse_join, None) 4711 4712 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4713 if not self._match(TokenType.INTO): 4714 return None 4715 4716 return self.expression( 4717 exp.UnpivotColumns, 4718 this=self._match_text_seq("NAME") and self._parse_column(), 4719 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4720 ) 4721 4722 # https://duckdb.org/docs/sql/statements/pivot 4723 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4724 def _parse_on() -> t.Optional[exp.Expression]: 4725 this = self._parse_bitwise() 4726 4727 if self._match(TokenType.IN): 4728 # PIVOT ... ON col IN (row_val1, row_val2) 4729 return self._parse_in(this) 4730 if self._match(TokenType.ALIAS, advance=False): 4731 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4732 return self._parse_alias(this) 4733 4734 return this 4735 4736 this = self._parse_table() 4737 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4738 into = self._parse_unpivot_columns() 4739 using = self._match(TokenType.USING) and self._parse_csv( 4740 lambda: self._parse_alias(self._parse_column()) 4741 ) 4742 group = self._parse_group() 4743 4744 return self.expression( 4745 exp.Pivot, 4746 this=this, 4747 expressions=expressions, 4748 using=using, 4749 group=group, 4750 unpivot=is_unpivot, 4751 into=into, 4752 ) 4753 4754 def _parse_pivot_in(self) -> exp.In: 4755 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4756 this = self._parse_select_or_expression() 4757 4758 self._match(TokenType.ALIAS) 4759 alias = self._parse_bitwise() 4760 if alias: 4761 if isinstance(alias, exp.Column) and not alias.db: 4762 alias = alias.this 4763 return self.expression(exp.PivotAlias, this=this, alias=alias) 4764 4765 return this 4766 4767 value = self._parse_column() 4768 4769 if not self._match(TokenType.IN): 4770 self.raise_error("Expecting IN") 4771 4772 if self._match(TokenType.L_PAREN): 4773 if self._match(TokenType.ANY): 4774 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4775 else: 4776 exprs = self._parse_csv(_parse_aliased_expression) 4777 self._match_r_paren() 4778 return self.expression(exp.In, this=value, expressions=exprs) 4779 4780 return self.expression(exp.In, this=value, field=self._parse_id_var()) 4781 4782 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4783 func = self._parse_function() 4784 if not func: 4785 if self._prev and self._prev.token_type == TokenType.COMMA: 4786 return None 4787 self.raise_error("Expecting an aggregation function in PIVOT") 4788 4789 return self._parse_alias(func) 4790 4791 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4792 index = self._index 4793 include_nulls = None 4794 4795 if self._match(TokenType.PIVOT): 4796 unpivot = False 4797 elif self._match(TokenType.UNPIVOT): 4798 unpivot = True 4799 4800 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4801 if self._match_text_seq("INCLUDE", "NULLS"): 4802 include_nulls = True 4803 elif self._match_text_seq("EXCLUDE", "NULLS"): 4804 include_nulls = False 4805 else: 4806 return None 4807 4808 expressions = [] 4809 4810 if not self._match(TokenType.L_PAREN): 4811 self._retreat(index) 4812 return None 4813 4814 if unpivot: 4815 expressions = self._parse_csv(self._parse_column) 4816 else: 4817 expressions = self._parse_csv(self._parse_pivot_aggregation) 4818 4819 if not expressions: 4820 self.raise_error("Failed to parse PIVOT's aggregation list") 4821 4822 if not self._match(TokenType.FOR): 4823 self.raise_error("Expecting FOR") 4824 4825 fields = [] 4826 while True: 4827 field = self._try_parse(self._parse_pivot_in) 4828 if not field: 4829 break 4830 fields.append(field) 4831 4832 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4833 self._parse_bitwise 4834 ) 4835 4836 group = self._parse_group() 4837 4838 self._match_r_paren() 4839 4840 pivot = self.expression( 4841 exp.Pivot, 4842 expressions=expressions, 4843 fields=fields, 4844 unpivot=unpivot, 4845 include_nulls=include_nulls, 4846 default_on_null=default_on_null, 4847 group=group, 4848 ) 4849 4850 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4851 pivot.set("alias", self._parse_table_alias()) 4852 4853 if not unpivot: 4854 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4855 4856 columns: t.List[exp.Expression] = [] 4857 all_fields = [] 4858 for pivot_field in pivot.fields: 4859 pivot_field_expressions = pivot_field.expressions 4860 4861 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4862 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4863 continue 4864 4865 all_fields.append( 4866 [ 4867 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4868 for fld in pivot_field_expressions 4869 ] 4870 ) 4871 4872 if all_fields: 4873 if names: 4874 all_fields.append(names) 4875 4876 # Generate all possible combinations of the pivot columns 4877 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4878 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4879 for fld_parts_tuple in itertools.product(*all_fields): 4880 fld_parts = list(fld_parts_tuple) 4881 4882 if names and self.PREFIXED_PIVOT_COLUMNS: 4883 # Move the "name" to the front of the list 4884 fld_parts.insert(0, fld_parts.pop(-1)) 4885 4886 columns.append(exp.to_identifier("_".join(fld_parts))) 4887 4888 pivot.set("columns", columns) 4889 4890 return pivot 4891 4892 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4893 return [agg.alias for agg in aggregations if agg.alias] 4894 4895 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4896 if not skip_where_token and not self._match(TokenType.PREWHERE): 4897 return None 4898 4899 return self.expression( 4900 exp.PreWhere, comments=self._prev_comments, this=self._parse_disjunction() 4901 ) 4902 4903 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4904 if not skip_where_token and not self._match(TokenType.WHERE): 4905 return None 4906 4907 return self.expression( 4908 exp.Where, comments=self._prev_comments, this=self._parse_disjunction() 4909 ) 4910 4911 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4912 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4913 return None 4914 comments = self._prev_comments 4915 4916 elements: t.Dict[str, t.Any] = defaultdict(list) 4917 4918 if self._match(TokenType.ALL): 4919 elements["all"] = True 4920 elif self._match(TokenType.DISTINCT): 4921 elements["all"] = False 4922 4923 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4924 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4925 4926 while True: 4927 index = self._index 4928 4929 elements["expressions"].extend( 4930 self._parse_csv( 4931 lambda: None 4932 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4933 else self._parse_disjunction() 4934 ) 4935 ) 4936 4937 before_with_index = self._index 4938 with_prefix = self._match(TokenType.WITH) 4939 4940 if cube_or_rollup := self._parse_cube_or_rollup(with_prefix=with_prefix): 4941 key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube" 4942 elements[key].append(cube_or_rollup) 4943 elif grouping_sets := self._parse_grouping_sets(): 4944 elements["grouping_sets"].append(grouping_sets) 4945 elif self._match_text_seq("TOTALS"): 4946 elements["totals"] = True # type: ignore 4947 4948 if before_with_index <= self._index <= before_with_index + 1: 4949 self._retreat(before_with_index) 4950 break 4951 4952 if index == self._index: 4953 break 4954 4955 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4956 4957 def _parse_cube_or_rollup(self, with_prefix: bool = False) -> t.Optional[exp.Cube | exp.Rollup]: 4958 if self._match(TokenType.CUBE): 4959 kind: t.Type[exp.Cube | exp.Rollup] = exp.Cube 4960 elif self._match(TokenType.ROLLUP): 4961 kind = exp.Rollup 4962 else: 4963 return None 4964 4965 return self.expression( 4966 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_bitwise) 4967 ) 4968 4969 def _parse_grouping_sets(self) -> t.Optional[exp.GroupingSets]: 4970 if self._match(TokenType.GROUPING_SETS): 4971 return self.expression( 4972 exp.GroupingSets, expressions=self._parse_wrapped_csv(self._parse_grouping_set) 4973 ) 4974 return None 4975 4976 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4977 return self._parse_grouping_sets() or self._parse_cube_or_rollup() or self._parse_bitwise() 4978 4979 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4980 if not skip_having_token and not self._match(TokenType.HAVING): 4981 return None 4982 return self.expression( 4983 exp.Having, comments=self._prev_comments, this=self._parse_disjunction() 4984 ) 4985 4986 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4987 if not self._match(TokenType.QUALIFY): 4988 return None 4989 return self.expression(exp.Qualify, this=self._parse_disjunction()) 4990 4991 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4992 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4993 exp.Prior, this=self._parse_bitwise() 4994 ) 4995 connect = self._parse_disjunction() 4996 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4997 return connect 4998 4999 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 5000 if skip_start_token: 5001 start = None 5002 elif self._match(TokenType.START_WITH): 5003 start = self._parse_disjunction() 5004 else: 5005 return None 5006 5007 self._match(TokenType.CONNECT_BY) 5008 nocycle = self._match_text_seq("NOCYCLE") 5009 connect = self._parse_connect_with_prior() 5010 5011 if not start and self._match(TokenType.START_WITH): 5012 start = self._parse_disjunction() 5013 5014 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 5015 5016 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 5017 this = self._parse_id_var(any_token=True) 5018 if self._match(TokenType.ALIAS): 5019 this = self.expression(exp.Alias, alias=this, this=self._parse_disjunction()) 5020 return this 5021 5022 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 5023 if self._match_text_seq("INTERPOLATE"): 5024 return self._parse_wrapped_csv(self._parse_name_as_expression) 5025 return None 5026 5027 def _parse_order( 5028 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 5029 ) -> t.Optional[exp.Expression]: 5030 siblings = None 5031 if not skip_order_token and not self._match(TokenType.ORDER_BY): 5032 if not self._match(TokenType.ORDER_SIBLINGS_BY): 5033 return this 5034 5035 siblings = True 5036 5037 return self.expression( 5038 exp.Order, 5039 comments=self._prev_comments, 5040 this=this, 5041 expressions=self._parse_csv(self._parse_ordered), 5042 siblings=siblings, 5043 ) 5044 5045 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 5046 if not self._match(token): 5047 return None 5048 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 5049 5050 def _parse_ordered( 5051 self, parse_method: t.Optional[t.Callable] = None 5052 ) -> t.Optional[exp.Ordered]: 5053 this = parse_method() if parse_method else self._parse_disjunction() 5054 if not this: 5055 return None 5056 5057 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 5058 this = exp.var("ALL") 5059 5060 asc = self._match(TokenType.ASC) 5061 desc: t.Optional[bool] = True if self._match(TokenType.DESC) else (False if asc else None) 5062 5063 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 5064 is_nulls_last = self._match_text_seq("NULLS", "LAST") 5065 5066 nulls_first = is_nulls_first or False 5067 explicitly_null_ordered = is_nulls_first or is_nulls_last 5068 5069 if ( 5070 not explicitly_null_ordered 5071 and ( 5072 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 5073 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 5074 ) 5075 and self.dialect.NULL_ORDERING != "nulls_are_last" 5076 ): 5077 nulls_first = True 5078 5079 if self._match_text_seq("WITH", "FILL"): 5080 with_fill = self.expression( 5081 exp.WithFill, 5082 from_=self._match(TokenType.FROM) and self._parse_bitwise(), 5083 to=self._match_text_seq("TO") and self._parse_bitwise(), 5084 step=self._match_text_seq("STEP") and self._parse_bitwise(), 5085 interpolate=self._parse_interpolate(), 5086 ) 5087 else: 5088 with_fill = None 5089 5090 return self.expression( 5091 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 5092 ) 5093 5094 def _parse_limit_options(self) -> t.Optional[exp.LimitOptions]: 5095 percent = self._match_set((TokenType.PERCENT, TokenType.MOD)) 5096 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 5097 self._match_text_seq("ONLY") 5098 with_ties = self._match_text_seq("WITH", "TIES") 5099 5100 if not (percent or rows or with_ties): 5101 return None 5102 5103 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 5104 5105 def _parse_limit( 5106 self, 5107 this: t.Optional[exp.Expression] = None, 5108 top: bool = False, 5109 skip_limit_token: bool = False, 5110 ) -> t.Optional[exp.Expression]: 5111 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 5112 comments = self._prev_comments 5113 if top: 5114 limit_paren = self._match(TokenType.L_PAREN) 5115 expression = self._parse_term() if limit_paren else self._parse_number() 5116 5117 if limit_paren: 5118 self._match_r_paren() 5119 5120 else: 5121 # Parsing LIMIT x% (i.e x PERCENT) as a term leads to an error, since 5122 # we try to build an exp.Mod expr. For that matter, we backtrack and instead 5123 # consume the factor plus parse the percentage separately 5124 index = self._index 5125 expression = self._try_parse(self._parse_term) 5126 if isinstance(expression, exp.Mod): 5127 self._retreat(index) 5128 expression = self._parse_factor() 5129 elif not expression: 5130 expression = self._parse_factor() 5131 limit_options = self._parse_limit_options() 5132 5133 if self._match(TokenType.COMMA): 5134 offset = expression 5135 expression = self._parse_term() 5136 else: 5137 offset = None 5138 5139 limit_exp = self.expression( 5140 exp.Limit, 5141 this=this, 5142 expression=expression, 5143 offset=offset, 5144 comments=comments, 5145 limit_options=limit_options, 5146 expressions=self._parse_limit_by(), 5147 ) 5148 5149 return limit_exp 5150 5151 if self._match(TokenType.FETCH): 5152 direction = ( 5153 self._prev.text.upper() 5154 if self._match_set((TokenType.FIRST, TokenType.NEXT)) 5155 else "FIRST" 5156 ) 5157 5158 count = self._parse_field(tokens=self.FETCH_TOKENS) 5159 5160 return self.expression( 5161 exp.Fetch, 5162 direction=direction, 5163 count=count, 5164 limit_options=self._parse_limit_options(), 5165 ) 5166 5167 return this 5168 5169 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5170 if not self._match(TokenType.OFFSET): 5171 return this 5172 5173 count = self._parse_term() 5174 self._match_set((TokenType.ROW, TokenType.ROWS)) 5175 5176 return self.expression( 5177 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 5178 ) 5179 5180 def _can_parse_limit_or_offset(self) -> bool: 5181 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 5182 return False 5183 5184 index = self._index 5185 result = bool( 5186 self._try_parse(self._parse_limit, retreat=True) 5187 or self._try_parse(self._parse_offset, retreat=True) 5188 ) 5189 self._retreat(index) 5190 5191 # MATCH_CONDITION (...) is a special construct that should not be consumed by limit/offset 5192 if self._next and self._next.token_type == TokenType.MATCH_CONDITION: 5193 result = False 5194 5195 return result 5196 5197 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 5198 return self._parse_csv(self._parse_bitwise) if self._match_text_seq("BY") else None 5199 5200 def _parse_locks(self) -> t.List[exp.Lock]: 5201 locks = [] 5202 while True: 5203 update, key = None, None 5204 if self._match_text_seq("FOR", "UPDATE"): 5205 update = True 5206 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 5207 "LOCK", "IN", "SHARE", "MODE" 5208 ): 5209 update = False 5210 elif self._match_text_seq("FOR", "KEY", "SHARE"): 5211 update, key = False, True 5212 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 5213 update, key = True, True 5214 else: 5215 break 5216 5217 expressions = None 5218 if self._match_text_seq("OF"): 5219 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 5220 5221 wait: t.Optional[bool | exp.Expression] = None 5222 if self._match_text_seq("NOWAIT"): 5223 wait = True 5224 elif self._match_text_seq("WAIT"): 5225 wait = self._parse_primary() 5226 elif self._match_text_seq("SKIP", "LOCKED"): 5227 wait = False 5228 5229 locks.append( 5230 self.expression( 5231 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 5232 ) 5233 ) 5234 5235 return locks 5236 5237 def parse_set_operation( 5238 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 5239 ) -> t.Optional[exp.Expression]: 5240 start = self._index 5241 _, side_token, kind_token = self._parse_join_parts() 5242 5243 side = side_token.text if side_token else None 5244 kind = kind_token.text if kind_token else None 5245 5246 if not self._match_set(self.SET_OPERATIONS): 5247 self._retreat(start) 5248 return None 5249 5250 token_type = self._prev.token_type 5251 5252 if token_type == TokenType.UNION: 5253 operation: t.Type[exp.SetOperation] = exp.Union 5254 elif token_type == TokenType.EXCEPT: 5255 operation = exp.Except 5256 else: 5257 operation = exp.Intersect 5258 5259 comments = self._prev.comments 5260 5261 if self._match(TokenType.DISTINCT): 5262 distinct: t.Optional[bool] = True 5263 elif self._match(TokenType.ALL): 5264 distinct = False 5265 else: 5266 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 5267 if distinct is None: 5268 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 5269 5270 by_name = ( 5271 self._match_text_seq("BY", "NAME") 5272 or self._match_text_seq("STRICT", "CORRESPONDING") 5273 or None 5274 ) 5275 if self._match_text_seq("CORRESPONDING"): 5276 by_name = True 5277 if not side and not kind: 5278 kind = "INNER" 5279 5280 on_column_list = None 5281 if by_name and self._match_texts(("ON", "BY")): 5282 on_column_list = self._parse_wrapped_csv(self._parse_column) 5283 5284 expression = self._parse_select( 5285 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 5286 ) 5287 5288 return self.expression( 5289 operation, 5290 comments=comments, 5291 this=this, 5292 distinct=distinct, 5293 by_name=by_name, 5294 expression=expression, 5295 side=side, 5296 kind=kind, 5297 on=on_column_list, 5298 ) 5299 5300 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5301 while this: 5302 setop = self.parse_set_operation(this) 5303 if not setop: 5304 break 5305 this = setop 5306 5307 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 5308 expression = this.expression 5309 5310 if expression: 5311 for arg in self.SET_OP_MODIFIERS: 5312 expr = expression.args.get(arg) 5313 if expr: 5314 this.set(arg, expr.pop()) 5315 5316 return this 5317 5318 def _parse_expression(self) -> t.Optional[exp.Expression]: 5319 return self._parse_alias(self._parse_assignment()) 5320 5321 def _parse_assignment(self) -> t.Optional[exp.Expression]: 5322 this = self._parse_disjunction() 5323 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 5324 # This allows us to parse <non-identifier token> := <expr> 5325 this = exp.column( 5326 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 5327 ) 5328 5329 while self._match_set(self.ASSIGNMENT): 5330 if isinstance(this, exp.Column) and len(this.parts) == 1: 5331 this = this.this 5332 5333 this = self.expression( 5334 self.ASSIGNMENT[self._prev.token_type], 5335 this=this, 5336 comments=self._prev_comments, 5337 expression=self._parse_assignment(), 5338 ) 5339 5340 return this 5341 5342 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 5343 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 5344 5345 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 5346 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 5347 5348 def _parse_equality(self) -> t.Optional[exp.Expression]: 5349 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 5350 5351 def _parse_comparison(self) -> t.Optional[exp.Expression]: 5352 return self._parse_tokens(self._parse_range, self.COMPARISON) 5353 5354 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5355 this = this or self._parse_bitwise() 5356 negate = self._match(TokenType.NOT) 5357 5358 if self._match_set(self.RANGE_PARSERS): 5359 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 5360 if not expression: 5361 return this 5362 5363 this = expression 5364 elif self._match(TokenType.ISNULL) or (negate and self._match(TokenType.NULL)): 5365 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5366 5367 # Postgres supports ISNULL and NOTNULL for conditions. 5368 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5369 if self._match(TokenType.NOTNULL): 5370 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5371 this = self.expression(exp.Not, this=this) 5372 5373 if negate: 5374 this = self._negate_range(this) 5375 5376 if self._match(TokenType.IS): 5377 this = self._parse_is(this) 5378 5379 return this 5380 5381 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5382 if not this: 5383 return this 5384 5385 return self.expression(exp.Not, this=this) 5386 5387 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5388 index = self._index - 1 5389 negate = self._match(TokenType.NOT) 5390 5391 if self._match_text_seq("DISTINCT", "FROM"): 5392 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5393 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5394 5395 if self._match(TokenType.JSON): 5396 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5397 5398 if self._match_text_seq("WITH"): 5399 _with = True 5400 elif self._match_text_seq("WITHOUT"): 5401 _with = False 5402 else: 5403 _with = None 5404 5405 unique = self._match(TokenType.UNIQUE) 5406 self._match_text_seq("KEYS") 5407 expression: t.Optional[exp.Expression] = self.expression( 5408 exp.JSON, 5409 this=kind, 5410 with_=_with, 5411 unique=unique, 5412 ) 5413 else: 5414 expression = self._parse_null() or self._parse_bitwise() 5415 if not expression: 5416 self._retreat(index) 5417 return None 5418 5419 this = self.expression(exp.Is, this=this, expression=expression) 5420 this = self.expression(exp.Not, this=this) if negate else this 5421 return self._parse_column_ops(this) 5422 5423 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5424 unnest = self._parse_unnest(with_alias=False) 5425 if unnest: 5426 this = self.expression(exp.In, this=this, unnest=unnest) 5427 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5428 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5429 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5430 5431 if len(expressions) == 1 and isinstance(query := expressions[0], exp.Query): 5432 this = self.expression( 5433 exp.In, 5434 this=this, 5435 query=self._parse_query_modifiers(query).subquery(copy=False), 5436 ) 5437 else: 5438 this = self.expression(exp.In, this=this, expressions=expressions) 5439 5440 if matched_l_paren: 5441 self._match_r_paren(this) 5442 elif not self._match(TokenType.R_BRACKET, expression=this): 5443 self.raise_error("Expecting ]") 5444 else: 5445 this = self.expression(exp.In, this=this, field=self._parse_column()) 5446 5447 return this 5448 5449 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5450 symmetric = None 5451 if self._match_text_seq("SYMMETRIC"): 5452 symmetric = True 5453 elif self._match_text_seq("ASYMMETRIC"): 5454 symmetric = False 5455 5456 low = self._parse_bitwise() 5457 self._match(TokenType.AND) 5458 high = self._parse_bitwise() 5459 5460 return self.expression( 5461 exp.Between, 5462 this=this, 5463 low=low, 5464 high=high, 5465 symmetric=symmetric, 5466 ) 5467 5468 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5469 if not self._match(TokenType.ESCAPE): 5470 return this 5471 return self.expression( 5472 exp.Escape, this=this, expression=self._parse_string() or self._parse_null() 5473 ) 5474 5475 def _parse_interval_span(self, this: exp.Expression) -> exp.Interval: 5476 # handle day-time format interval span with omitted units: 5477 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5478 interval_span_units_omitted = None 5479 if ( 5480 this 5481 and this.is_string 5482 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5483 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5484 ): 5485 index = self._index 5486 5487 # Var "TO" Var 5488 first_unit = self._parse_var(any_token=True, upper=True) 5489 second_unit = None 5490 if first_unit and self._match_text_seq("TO"): 5491 second_unit = self._parse_var(any_token=True, upper=True) 5492 5493 interval_span_units_omitted = not (first_unit and second_unit) 5494 5495 self._retreat(index) 5496 5497 unit = ( 5498 None 5499 if interval_span_units_omitted 5500 else ( 5501 self._parse_function() 5502 or ( 5503 not self._match_set((TokenType.ALIAS, TokenType.DCOLON), advance=False) 5504 and self._parse_var(any_token=True, upper=True) 5505 ) 5506 ) 5507 ) 5508 5509 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5510 # each INTERVAL expression into this canonical form so it's easy to transpile 5511 if this and this.is_number: 5512 this = exp.Literal.string(this.to_py()) 5513 elif this and this.is_string: 5514 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5515 if parts and unit: 5516 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5517 unit = None 5518 self._retreat(self._index - 1) 5519 5520 if len(parts) == 1: 5521 this = exp.Literal.string(parts[0][0]) 5522 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5523 5524 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5525 unit = self.expression( 5526 exp.IntervalSpan, 5527 this=unit, 5528 expression=self._parse_function() or self._parse_var(any_token=True, upper=True), 5529 ) 5530 5531 return self.expression(exp.Interval, this=this, unit=unit) 5532 5533 def _parse_interval(self, require_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5534 index = self._index 5535 5536 if not self._match(TokenType.INTERVAL) and require_interval: 5537 return None 5538 5539 if self._match(TokenType.STRING, advance=False): 5540 this = self._parse_primary() 5541 else: 5542 this = self._parse_term() 5543 5544 if not this or ( 5545 isinstance(this, exp.Column) 5546 and not this.table 5547 and not this.this.quoted 5548 and self._curr 5549 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 5550 ): 5551 self._retreat(index) 5552 return None 5553 5554 interval = self._parse_interval_span(this) 5555 5556 index = self._index 5557 self._match(TokenType.PLUS) 5558 5559 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5560 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5561 return self.expression(exp.Add, this=interval, expression=self._parse_interval(False)) 5562 5563 self._retreat(index) 5564 return interval 5565 5566 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5567 this = self._parse_term() 5568 5569 while True: 5570 if self._match_set(self.BITWISE): 5571 this = self.expression( 5572 self.BITWISE[self._prev.token_type], 5573 this=this, 5574 expression=self._parse_term(), 5575 ) 5576 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5577 this = self.expression( 5578 exp.DPipe, 5579 this=this, 5580 expression=self._parse_term(), 5581 safe=not self.dialect.STRICT_STRING_CONCAT, 5582 ) 5583 elif self._match(TokenType.DQMARK): 5584 this = self.expression( 5585 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5586 ) 5587 elif self._match_pair(TokenType.LT, TokenType.LT): 5588 this = self.expression( 5589 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5590 ) 5591 elif self._match_pair(TokenType.GT, TokenType.GT): 5592 this = self.expression( 5593 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5594 ) 5595 else: 5596 break 5597 5598 return this 5599 5600 def _parse_term(self) -> t.Optional[exp.Expression]: 5601 this = self._parse_factor() 5602 5603 while self._match_set(self.TERM): 5604 klass = self.TERM[self._prev.token_type] 5605 comments = self._prev_comments 5606 expression = self._parse_factor() 5607 5608 this = self.expression(klass, this=this, comments=comments, expression=expression) 5609 5610 if isinstance(this, exp.Collate): 5611 expr = this.expression 5612 5613 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5614 # fallback to Identifier / Var 5615 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5616 ident = expr.this 5617 if isinstance(ident, exp.Identifier): 5618 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5619 5620 return this 5621 5622 def _parse_factor(self) -> t.Optional[exp.Expression]: 5623 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5624 this = self._parse_at_time_zone(parse_method()) 5625 5626 while self._match_set(self.FACTOR): 5627 klass = self.FACTOR[self._prev.token_type] 5628 comments = self._prev_comments 5629 expression = parse_method() 5630 5631 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5632 self._retreat(self._index - 1) 5633 return this 5634 5635 this = self.expression(klass, this=this, comments=comments, expression=expression) 5636 5637 if isinstance(this, exp.Div): 5638 this.set("typed", self.dialect.TYPED_DIVISION) 5639 this.set("safe", self.dialect.SAFE_DIVISION) 5640 5641 return this 5642 5643 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5644 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5645 5646 def _parse_unary(self) -> t.Optional[exp.Expression]: 5647 if self._match_set(self.UNARY_PARSERS): 5648 return self.UNARY_PARSERS[self._prev.token_type](self) 5649 return self._parse_type() 5650 5651 def _parse_type( 5652 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5653 ) -> t.Optional[exp.Expression]: 5654 if interval := parse_interval and self._parse_interval(): 5655 return self._parse_column_ops(interval) 5656 5657 index = self._index 5658 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5659 5660 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5661 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5662 if isinstance(data_type, exp.Cast): 5663 # This constructor can contain ops directly after it, for instance struct unnesting: 5664 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5665 return self._parse_column_ops(data_type) 5666 5667 if data_type: 5668 index2 = self._index 5669 this = self._parse_primary() 5670 5671 if isinstance(this, exp.Literal): 5672 literal = this.name 5673 this = self._parse_column_ops(this) 5674 5675 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5676 if parser: 5677 return parser(self, this, data_type) 5678 5679 if ( 5680 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5681 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5682 and TIME_ZONE_RE.search(literal) 5683 ): 5684 data_type = exp.DataType.build("TIMESTAMPTZ") 5685 5686 return self.expression(exp.Cast, this=this, to=data_type) 5687 5688 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5689 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5690 # 5691 # If the index difference here is greater than 1, that means the parser itself must have 5692 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5693 # 5694 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5695 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5696 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5697 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5698 # 5699 # In these cases, we don't really want to return the converted type, but instead retreat 5700 # and try to parse a Column or Identifier in the section below. 5701 if data_type.expressions and index2 - index > 1: 5702 self._retreat(index2) 5703 return self._parse_column_ops(data_type) 5704 5705 self._retreat(index) 5706 5707 if fallback_to_identifier: 5708 return self._parse_id_var() 5709 5710 this = self._parse_column() 5711 if this: 5712 this = self._parse_column_ops(this) 5713 if this and self.COLON_IS_VARIANT_EXTRACT: 5714 this = self._parse_colon_as_variant_extract(this) 5715 5716 return this 5717 5718 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5719 this = self._parse_type() 5720 if not this: 5721 return None 5722 5723 if isinstance(this, exp.Column) and not this.table: 5724 this = exp.var(this.name.upper()) 5725 5726 return self.expression( 5727 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5728 ) 5729 5730 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5731 type_name = identifier.name 5732 5733 while self._match(TokenType.DOT): 5734 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5735 5736 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5737 5738 def _parse_types( 5739 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5740 ) -> t.Optional[exp.Expression]: 5741 index = self._index 5742 this: t.Optional[exp.Expression] = None 5743 5744 if self._match_set(self.TYPE_TOKENS): 5745 type_token = self._prev.token_type 5746 else: 5747 type_token = None 5748 identifier = allow_identifiers and self._parse_id_var( 5749 any_token=False, tokens=(TokenType.VAR,) 5750 ) 5751 if isinstance(identifier, exp.Identifier): 5752 try: 5753 tokens = self.dialect.tokenize(identifier.name) 5754 except TokenError: 5755 tokens = None 5756 5757 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5758 type_token = tokens[0].token_type 5759 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5760 this = self._parse_user_defined_type(identifier) 5761 else: 5762 self._retreat(self._index - 1) 5763 return None 5764 else: 5765 return None 5766 5767 if type_token == TokenType.PSEUDO_TYPE: 5768 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5769 5770 if type_token == TokenType.OBJECT_IDENTIFIER: 5771 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5772 5773 # https://materialize.com/docs/sql/types/map/ 5774 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5775 key_type = self._parse_types( 5776 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5777 ) 5778 if not self._match(TokenType.FARROW): 5779 self._retreat(index) 5780 return None 5781 5782 value_type = self._parse_types( 5783 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5784 ) 5785 if not self._match(TokenType.R_BRACKET): 5786 self._retreat(index) 5787 return None 5788 5789 return exp.DataType( 5790 this=exp.DataType.Type.MAP, 5791 expressions=[key_type, value_type], 5792 nested=True, 5793 ) 5794 5795 nested = type_token in self.NESTED_TYPE_TOKENS 5796 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5797 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5798 expressions = None 5799 maybe_func = False 5800 5801 if self._match(TokenType.L_PAREN): 5802 if is_struct: 5803 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5804 elif nested: 5805 expressions = self._parse_csv( 5806 lambda: self._parse_types( 5807 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5808 ) 5809 ) 5810 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5811 this = expressions[0] 5812 this.set("nullable", True) 5813 self._match_r_paren() 5814 return this 5815 elif type_token in self.ENUM_TYPE_TOKENS: 5816 expressions = self._parse_csv(self._parse_equality) 5817 elif is_aggregate: 5818 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5819 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5820 ) 5821 if not func_or_ident: 5822 return None 5823 expressions = [func_or_ident] 5824 if self._match(TokenType.COMMA): 5825 expressions.extend( 5826 self._parse_csv( 5827 lambda: self._parse_types( 5828 check_func=check_func, 5829 schema=schema, 5830 allow_identifiers=allow_identifiers, 5831 ) 5832 ) 5833 ) 5834 else: 5835 expressions = self._parse_csv(self._parse_type_size) 5836 5837 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5838 if type_token == TokenType.VECTOR and len(expressions) == 2: 5839 expressions = self._parse_vector_expressions(expressions) 5840 5841 if not self._match(TokenType.R_PAREN): 5842 self._retreat(index) 5843 return None 5844 5845 maybe_func = True 5846 5847 values: t.Optional[t.List[exp.Expression]] = None 5848 5849 if nested and self._match(TokenType.LT): 5850 if is_struct: 5851 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5852 else: 5853 expressions = self._parse_csv( 5854 lambda: self._parse_types( 5855 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5856 ) 5857 ) 5858 5859 if not self._match(TokenType.GT): 5860 self.raise_error("Expecting >") 5861 5862 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5863 values = self._parse_csv(self._parse_disjunction) 5864 if not values and is_struct: 5865 values = None 5866 self._retreat(self._index - 1) 5867 else: 5868 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5869 5870 if type_token in self.TIMESTAMPS: 5871 if self._match_text_seq("WITH", "TIME", "ZONE"): 5872 maybe_func = False 5873 tz_type = ( 5874 exp.DataType.Type.TIMETZ 5875 if type_token in self.TIMES 5876 else exp.DataType.Type.TIMESTAMPTZ 5877 ) 5878 this = exp.DataType(this=tz_type, expressions=expressions) 5879 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5880 maybe_func = False 5881 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5882 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5883 maybe_func = False 5884 elif type_token == TokenType.INTERVAL: 5885 if self._curr and self._curr.text.upper() in self.dialect.VALID_INTERVAL_UNITS: 5886 unit = self._parse_var(upper=True) 5887 if self._match_text_seq("TO"): 5888 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5889 5890 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5891 else: 5892 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5893 elif type_token == TokenType.VOID: 5894 this = exp.DataType(this=exp.DataType.Type.NULL) 5895 5896 if maybe_func and check_func: 5897 index2 = self._index 5898 peek = self._parse_string() 5899 5900 if not peek: 5901 self._retreat(index) 5902 return None 5903 5904 self._retreat(index2) 5905 5906 if not this: 5907 if self._match_text_seq("UNSIGNED"): 5908 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5909 if not unsigned_type_token: 5910 self.raise_error(f"Cannot convert {type_token.name} to unsigned.") 5911 5912 type_token = unsigned_type_token or type_token 5913 5914 # NULLABLE without parentheses can be a column (Presto/Trino) 5915 if type_token == TokenType.NULLABLE and not expressions: 5916 self._retreat(index) 5917 return None 5918 5919 this = exp.DataType( 5920 this=exp.DataType.Type[type_token.name], 5921 expressions=expressions, 5922 nested=nested, 5923 ) 5924 5925 # Empty arrays/structs are allowed 5926 if values is not None: 5927 cls = exp.Struct if is_struct else exp.Array 5928 this = exp.cast(cls(expressions=values), this, copy=False) 5929 5930 elif expressions: 5931 this.set("expressions", expressions) 5932 5933 # https://materialize.com/docs/sql/types/list/#type-name 5934 while self._match(TokenType.LIST): 5935 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5936 5937 index = self._index 5938 5939 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5940 matched_array = self._match(TokenType.ARRAY) 5941 5942 while self._curr: 5943 datatype_token = self._prev.token_type 5944 matched_l_bracket = self._match(TokenType.L_BRACKET) 5945 5946 if (not matched_l_bracket and not matched_array) or ( 5947 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5948 ): 5949 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5950 # not to be confused with the fixed size array parsing 5951 break 5952 5953 matched_array = False 5954 values = self._parse_csv(self._parse_disjunction) or None 5955 if ( 5956 values 5957 and not schema 5958 and ( 5959 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS 5960 or datatype_token == TokenType.ARRAY 5961 or not self._match(TokenType.R_BRACKET, advance=False) 5962 ) 5963 ): 5964 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5965 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5966 self._retreat(index) 5967 break 5968 5969 this = exp.DataType( 5970 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5971 ) 5972 self._match(TokenType.R_BRACKET) 5973 5974 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5975 converter = self.TYPE_CONVERTERS.get(this.this) 5976 if converter: 5977 this = converter(t.cast(exp.DataType, this)) 5978 5979 return this 5980 5981 def _parse_vector_expressions( 5982 self, expressions: t.List[exp.Expression] 5983 ) -> t.List[exp.Expression]: 5984 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5985 5986 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5987 index = self._index 5988 5989 if ( 5990 self._curr 5991 and self._next 5992 and self._curr.token_type in self.TYPE_TOKENS 5993 and self._next.token_type in self.TYPE_TOKENS 5994 ): 5995 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5996 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5997 this = self._parse_id_var() 5998 else: 5999 this = ( 6000 self._parse_type(parse_interval=False, fallback_to_identifier=True) 6001 or self._parse_id_var() 6002 ) 6003 6004 self._match(TokenType.COLON) 6005 6006 if ( 6007 type_required 6008 and not isinstance(this, exp.DataType) 6009 and not self._match_set(self.TYPE_TOKENS, advance=False) 6010 ): 6011 self._retreat(index) 6012 return self._parse_types() 6013 6014 return self._parse_column_def(this) 6015 6016 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6017 if not self._match_text_seq("AT", "TIME", "ZONE"): 6018 return this 6019 return self._parse_at_time_zone( 6020 self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 6021 ) 6022 6023 def _parse_column(self) -> t.Optional[exp.Expression]: 6024 this = self._parse_column_reference() 6025 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 6026 6027 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 6028 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 6029 6030 return column 6031 6032 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 6033 this = self._parse_field() 6034 if ( 6035 not this 6036 and self._match(TokenType.VALUES, advance=False) 6037 and self.VALUES_FOLLOWED_BY_PAREN 6038 and (not self._next or self._next.token_type != TokenType.L_PAREN) 6039 ): 6040 this = self._parse_id_var() 6041 6042 if isinstance(this, exp.Identifier): 6043 # We bubble up comments from the Identifier to the Column 6044 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 6045 6046 return this 6047 6048 def _parse_colon_as_variant_extract( 6049 self, this: t.Optional[exp.Expression] 6050 ) -> t.Optional[exp.Expression]: 6051 casts = [] 6052 json_path = [] 6053 escape = None 6054 6055 while self._match(TokenType.COLON): 6056 start_index = self._index 6057 6058 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 6059 path = self._parse_column_ops( 6060 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 6061 ) 6062 6063 # The cast :: operator has a lower precedence than the extraction operator :, so 6064 # we rearrange the AST appropriately to avoid casting the JSON path 6065 while isinstance(path, exp.Cast): 6066 casts.append(path.to) 6067 path = path.this 6068 6069 if casts: 6070 dcolon_offset = next( 6071 i 6072 for i, t in enumerate(self._tokens[start_index:]) 6073 if t.token_type == TokenType.DCOLON 6074 ) 6075 end_token = self._tokens[start_index + dcolon_offset - 1] 6076 else: 6077 end_token = self._prev 6078 6079 if path: 6080 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 6081 # it'll roundtrip to a string literal in GET_PATH 6082 if isinstance(path, exp.Identifier) and path.quoted: 6083 escape = True 6084 6085 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 6086 6087 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 6088 # Databricks transforms it back to the colon/dot notation 6089 if json_path: 6090 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 6091 6092 if json_path_expr: 6093 json_path_expr.set("escape", escape) 6094 6095 this = self.expression( 6096 exp.JSONExtract, 6097 this=this, 6098 expression=json_path_expr, 6099 variant_extract=True, 6100 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 6101 ) 6102 6103 while casts: 6104 this = self.expression(exp.Cast, this=this, to=casts.pop()) 6105 6106 return this 6107 6108 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 6109 return self._parse_types() 6110 6111 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6112 this = self._parse_bracket(this) 6113 6114 column_operators = self.COLUMN_OPERATORS 6115 cast_column_operators = self.CAST_COLUMN_OPERATORS 6116 while self._curr: 6117 op_token = self._curr.token_type 6118 6119 if op_token not in column_operators: 6120 break 6121 op = column_operators[op_token] 6122 self._advance() 6123 6124 if op_token in cast_column_operators: 6125 field = self._parse_dcolon() 6126 if not field: 6127 self.raise_error("Expected type") 6128 elif op and self._curr: 6129 field = self._parse_column_reference() or self._parse_bitwise() 6130 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 6131 field = self._parse_column_ops(field) 6132 else: 6133 field = self._parse_field(any_token=True, anonymous_func=True) 6134 6135 # Function calls can be qualified, e.g., x.y.FOO() 6136 # This converts the final AST to a series of Dots leading to the function call 6137 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 6138 if isinstance(field, (exp.Func, exp.Window)) and this: 6139 this = this.transform( 6140 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 6141 ) 6142 6143 if op: 6144 this = op(self, this, field) 6145 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 6146 this = self.expression( 6147 exp.Column, 6148 comments=this.comments, 6149 this=field, 6150 table=this.this, 6151 db=this.args.get("table"), 6152 catalog=this.args.get("db"), 6153 ) 6154 elif isinstance(field, exp.Window): 6155 # Move the exp.Dot's to the window's function 6156 window_func = self.expression(exp.Dot, this=this, expression=field.this) 6157 field.set("this", window_func) 6158 this = field 6159 else: 6160 this = self.expression(exp.Dot, this=this, expression=field) 6161 6162 if field and field.comments: 6163 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 6164 6165 this = self._parse_bracket(this) 6166 6167 return this 6168 6169 def _parse_paren(self) -> t.Optional[exp.Expression]: 6170 if not self._match(TokenType.L_PAREN): 6171 return None 6172 6173 comments = self._prev_comments 6174 query = self._parse_select() 6175 6176 if query: 6177 expressions = [query] 6178 else: 6179 expressions = self._parse_expressions() 6180 6181 this = seq_get(expressions, 0) 6182 6183 if not this and self._match(TokenType.R_PAREN, advance=False): 6184 this = self.expression(exp.Tuple) 6185 elif isinstance(this, exp.UNWRAPPED_QUERIES): 6186 this = self._parse_subquery(this=this, parse_alias=False) 6187 elif isinstance(this, (exp.Subquery, exp.Values)): 6188 this = self._parse_subquery( 6189 this=self._parse_query_modifiers(self._parse_set_operations(this)), 6190 parse_alias=False, 6191 ) 6192 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 6193 this = self.expression(exp.Tuple, expressions=expressions) 6194 else: 6195 this = self.expression(exp.Paren, this=this) 6196 6197 if this: 6198 this.add_comments(comments) 6199 6200 self._match_r_paren(expression=this) 6201 6202 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 6203 return self._parse_window(this) 6204 6205 return this 6206 6207 def _parse_primary(self) -> t.Optional[exp.Expression]: 6208 if self._match_set(self.PRIMARY_PARSERS): 6209 token_type = self._prev.token_type 6210 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 6211 6212 if token_type == TokenType.STRING: 6213 expressions = [primary] 6214 while self._match(TokenType.STRING): 6215 expressions.append(exp.Literal.string(self._prev.text)) 6216 6217 if len(expressions) > 1: 6218 return self.expression( 6219 exp.Concat, expressions=expressions, coalesce=self.dialect.CONCAT_COALESCE 6220 ) 6221 6222 return primary 6223 6224 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 6225 return exp.Literal.number(f"0.{self._prev.text}") 6226 6227 return self._parse_paren() 6228 6229 def _parse_field( 6230 self, 6231 any_token: bool = False, 6232 tokens: t.Optional[t.Collection[TokenType]] = None, 6233 anonymous_func: bool = False, 6234 ) -> t.Optional[exp.Expression]: 6235 if anonymous_func: 6236 field = ( 6237 self._parse_function(anonymous=anonymous_func, any_token=any_token) 6238 or self._parse_primary() 6239 ) 6240 else: 6241 field = self._parse_primary() or self._parse_function( 6242 anonymous=anonymous_func, any_token=any_token 6243 ) 6244 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 6245 6246 def _parse_function( 6247 self, 6248 functions: t.Optional[t.Dict[str, t.Callable]] = None, 6249 anonymous: bool = False, 6250 optional_parens: bool = True, 6251 any_token: bool = False, 6252 ) -> t.Optional[exp.Expression]: 6253 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 6254 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 6255 fn_syntax = False 6256 if ( 6257 self._match(TokenType.L_BRACE, advance=False) 6258 and self._next 6259 and self._next.text.upper() == "FN" 6260 ): 6261 self._advance(2) 6262 fn_syntax = True 6263 6264 func = self._parse_function_call( 6265 functions=functions, 6266 anonymous=anonymous, 6267 optional_parens=optional_parens, 6268 any_token=any_token, 6269 ) 6270 6271 if fn_syntax: 6272 self._match(TokenType.R_BRACE) 6273 6274 return func 6275 6276 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 6277 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 6278 6279 def _parse_function_call( 6280 self, 6281 functions: t.Optional[t.Dict[str, t.Callable]] = None, 6282 anonymous: bool = False, 6283 optional_parens: bool = True, 6284 any_token: bool = False, 6285 ) -> t.Optional[exp.Expression]: 6286 if not self._curr: 6287 return None 6288 6289 comments = self._curr.comments 6290 prev = self._prev 6291 token = self._curr 6292 token_type = self._curr.token_type 6293 this = self._curr.text 6294 upper = this.upper() 6295 6296 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 6297 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 6298 self._advance() 6299 return self._parse_window(parser(self)) 6300 6301 if not self._next or self._next.token_type != TokenType.L_PAREN: 6302 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 6303 self._advance() 6304 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 6305 6306 return None 6307 6308 if any_token: 6309 if token_type in self.RESERVED_TOKENS: 6310 return None 6311 elif token_type not in self.FUNC_TOKENS: 6312 return None 6313 6314 self._advance(2) 6315 6316 parser = self.FUNCTION_PARSERS.get(upper) 6317 if parser and not anonymous: 6318 this = parser(self) 6319 else: 6320 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 6321 6322 if subquery_predicate: 6323 expr = None 6324 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 6325 expr = self._parse_select() 6326 self._match_r_paren() 6327 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 6328 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 6329 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 6330 self._advance(-1) 6331 expr = self._parse_bitwise() 6332 6333 if expr: 6334 return self.expression(subquery_predicate, comments=comments, this=expr) 6335 6336 if functions is None: 6337 functions = self.FUNCTIONS 6338 6339 function = functions.get(upper) 6340 known_function = function and not anonymous 6341 6342 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 6343 args = self._parse_function_args(alias) 6344 6345 post_func_comments = self._curr and self._curr.comments 6346 if known_function and post_func_comments: 6347 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 6348 # call we'll construct it as exp.Anonymous, even if it's "known" 6349 if any( 6350 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 6351 for comment in post_func_comments 6352 ): 6353 known_function = False 6354 6355 if alias and known_function: 6356 args = self._kv_to_prop_eq(args) 6357 6358 if known_function: 6359 func_builder = t.cast(t.Callable, function) 6360 6361 if "dialect" in func_builder.__code__.co_varnames: 6362 func = func_builder(args, dialect=self.dialect) 6363 else: 6364 func = func_builder(args) 6365 6366 func = self.validate_expression(func, args) 6367 if self.dialect.PRESERVE_ORIGINAL_NAMES: 6368 func.meta["name"] = this 6369 6370 this = func 6371 else: 6372 if token_type == TokenType.IDENTIFIER: 6373 this = exp.Identifier(this=this, quoted=True).update_positions(token) 6374 6375 this = self.expression(exp.Anonymous, this=this, expressions=args) 6376 6377 this = this.update_positions(token) 6378 6379 if isinstance(this, exp.Expression): 6380 this.add_comments(comments) 6381 6382 self._match_r_paren(this) 6383 return self._parse_window(this) 6384 6385 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 6386 return expression 6387 6388 def _kv_to_prop_eq( 6389 self, expressions: t.List[exp.Expression], parse_map: bool = False 6390 ) -> t.List[exp.Expression]: 6391 transformed = [] 6392 6393 for index, e in enumerate(expressions): 6394 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 6395 if isinstance(e, exp.Alias): 6396 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 6397 6398 if not isinstance(e, exp.PropertyEQ): 6399 e = self.expression( 6400 exp.PropertyEQ, 6401 this=e.this if parse_map else exp.to_identifier(e.this.name), 6402 expression=e.expression, 6403 ) 6404 6405 if isinstance(e.this, exp.Column): 6406 e.this.replace(e.this.this) 6407 else: 6408 e = self._to_prop_eq(e, index) 6409 6410 transformed.append(e) 6411 6412 return transformed 6413 6414 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 6415 return self._parse_statement() 6416 6417 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 6418 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 6419 6420 def _parse_user_defined_function( 6421 self, kind: t.Optional[TokenType] = None 6422 ) -> t.Optional[exp.Expression]: 6423 this = self._parse_table_parts(schema=True) 6424 6425 if not self._match(TokenType.L_PAREN): 6426 return this 6427 6428 expressions = self._parse_csv(self._parse_function_parameter) 6429 self._match_r_paren() 6430 return self.expression( 6431 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 6432 ) 6433 6434 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 6435 literal = self._parse_primary() 6436 if literal: 6437 return self.expression(exp.Introducer, token=token, expression=literal) 6438 6439 return self._identifier_expression(token) 6440 6441 def _parse_session_parameter(self) -> exp.SessionParameter: 6442 kind = None 6443 this = self._parse_id_var() or self._parse_primary() 6444 6445 if this and self._match(TokenType.DOT): 6446 kind = this.name 6447 this = self._parse_var() or self._parse_primary() 6448 6449 return self.expression(exp.SessionParameter, this=this, kind=kind) 6450 6451 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6452 return self._parse_id_var() 6453 6454 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6455 index = self._index 6456 6457 if self._match(TokenType.L_PAREN): 6458 expressions = t.cast( 6459 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6460 ) 6461 6462 if not self._match(TokenType.R_PAREN): 6463 self._retreat(index) 6464 else: 6465 expressions = [self._parse_lambda_arg()] 6466 6467 if self._match_set(self.LAMBDAS): 6468 return self.LAMBDAS[self._prev.token_type](self, expressions) 6469 6470 self._retreat(index) 6471 6472 this: t.Optional[exp.Expression] 6473 6474 if self._match(TokenType.DISTINCT): 6475 this = self.expression( 6476 exp.Distinct, expressions=self._parse_csv(self._parse_disjunction) 6477 ) 6478 else: 6479 this = self._parse_select_or_expression(alias=alias) 6480 6481 return self._parse_limit( 6482 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6483 ) 6484 6485 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6486 index = self._index 6487 if not self._match(TokenType.L_PAREN): 6488 return this 6489 6490 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6491 # expr can be of both types 6492 if self._match_set(self.SELECT_START_TOKENS): 6493 self._retreat(index) 6494 return this 6495 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6496 self._match_r_paren() 6497 return self.expression(exp.Schema, this=this, expressions=args) 6498 6499 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6500 return self._parse_column_def(self._parse_field(any_token=True)) 6501 6502 def _parse_column_def( 6503 self, this: t.Optional[exp.Expression], computed_column: bool = True 6504 ) -> t.Optional[exp.Expression]: 6505 # column defs are not really columns, they're identifiers 6506 if isinstance(this, exp.Column): 6507 this = this.this 6508 6509 if not computed_column: 6510 self._match(TokenType.ALIAS) 6511 6512 kind = self._parse_types(schema=True) 6513 6514 if self._match_text_seq("FOR", "ORDINALITY"): 6515 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6516 6517 constraints: t.List[exp.Expression] = [] 6518 6519 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6520 ("ALIAS", "MATERIALIZED") 6521 ): 6522 persisted = self._prev.text.upper() == "MATERIALIZED" 6523 constraint_kind = exp.ComputedColumnConstraint( 6524 this=self._parse_disjunction(), 6525 persisted=persisted or self._match_text_seq("PERSISTED"), 6526 data_type=exp.Var(this="AUTO") 6527 if self._match_text_seq("AUTO") 6528 else self._parse_types(), 6529 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6530 ) 6531 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6532 elif not kind and self._match_set({TokenType.IN, TokenType.OUT}, advance=False): 6533 in_out_constraint = self.expression( 6534 exp.InOutColumnConstraint, 6535 input_=self._match(TokenType.IN), 6536 output=self._match(TokenType.OUT), 6537 ) 6538 constraints.append(in_out_constraint) 6539 kind = self._parse_types() 6540 elif ( 6541 kind 6542 and self._match(TokenType.ALIAS, advance=False) 6543 and ( 6544 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6545 or (self._next and self._next.token_type == TokenType.L_PAREN) 6546 ) 6547 ): 6548 self._advance() 6549 constraints.append( 6550 self.expression( 6551 exp.ColumnConstraint, 6552 kind=exp.ComputedColumnConstraint( 6553 this=self._parse_disjunction(), 6554 persisted=self._match_texts(("STORED", "VIRTUAL")) 6555 and self._prev.text.upper() == "STORED", 6556 ), 6557 ) 6558 ) 6559 6560 while True: 6561 constraint = self._parse_column_constraint() 6562 if not constraint: 6563 break 6564 constraints.append(constraint) 6565 6566 if not kind and not constraints: 6567 return this 6568 6569 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6570 6571 def _parse_auto_increment( 6572 self, 6573 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6574 start = None 6575 increment = None 6576 order = None 6577 6578 if self._match(TokenType.L_PAREN, advance=False): 6579 args = self._parse_wrapped_csv(self._parse_bitwise) 6580 start = seq_get(args, 0) 6581 increment = seq_get(args, 1) 6582 elif self._match_text_seq("START"): 6583 start = self._parse_bitwise() 6584 self._match_text_seq("INCREMENT") 6585 increment = self._parse_bitwise() 6586 if self._match_text_seq("ORDER"): 6587 order = True 6588 elif self._match_text_seq("NOORDER"): 6589 order = False 6590 6591 if start and increment: 6592 return exp.GeneratedAsIdentityColumnConstraint( 6593 start=start, increment=increment, this=False, order=order 6594 ) 6595 6596 return exp.AutoIncrementColumnConstraint() 6597 6598 def _parse_check_constraint(self) -> t.Optional[exp.CheckColumnConstraint]: 6599 if not self._match(TokenType.L_PAREN, advance=False): 6600 return None 6601 6602 return self.expression( 6603 exp.CheckColumnConstraint, 6604 this=self._parse_wrapped(self._parse_assignment), 6605 enforced=self._match_text_seq("ENFORCED"), 6606 ) 6607 6608 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6609 if not self._match_text_seq("REFRESH"): 6610 self._retreat(self._index - 1) 6611 return None 6612 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6613 6614 def _parse_compress(self) -> exp.CompressColumnConstraint: 6615 if self._match(TokenType.L_PAREN, advance=False): 6616 return self.expression( 6617 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6618 ) 6619 6620 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6621 6622 def _parse_generated_as_identity( 6623 self, 6624 ) -> ( 6625 exp.GeneratedAsIdentityColumnConstraint 6626 | exp.ComputedColumnConstraint 6627 | exp.GeneratedAsRowColumnConstraint 6628 ): 6629 if self._match_text_seq("BY", "DEFAULT"): 6630 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6631 this = self.expression( 6632 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6633 ) 6634 else: 6635 self._match_text_seq("ALWAYS") 6636 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6637 6638 self._match(TokenType.ALIAS) 6639 6640 if self._match_text_seq("ROW"): 6641 start = self._match_text_seq("START") 6642 if not start: 6643 self._match(TokenType.END) 6644 hidden = self._match_text_seq("HIDDEN") 6645 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6646 6647 identity = self._match_text_seq("IDENTITY") 6648 6649 if self._match(TokenType.L_PAREN): 6650 if self._match(TokenType.START_WITH): 6651 this.set("start", self._parse_bitwise()) 6652 if self._match_text_seq("INCREMENT", "BY"): 6653 this.set("increment", self._parse_bitwise()) 6654 if self._match_text_seq("MINVALUE"): 6655 this.set("minvalue", self._parse_bitwise()) 6656 if self._match_text_seq("MAXVALUE"): 6657 this.set("maxvalue", self._parse_bitwise()) 6658 6659 if self._match_text_seq("CYCLE"): 6660 this.set("cycle", True) 6661 elif self._match_text_seq("NO", "CYCLE"): 6662 this.set("cycle", False) 6663 6664 if not identity: 6665 this.set("expression", self._parse_range()) 6666 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6667 args = self._parse_csv(self._parse_bitwise) 6668 this.set("start", seq_get(args, 0)) 6669 this.set("increment", seq_get(args, 1)) 6670 6671 self._match_r_paren() 6672 6673 return this 6674 6675 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6676 self._match_text_seq("LENGTH") 6677 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6678 6679 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6680 if self._match_text_seq("NULL"): 6681 return self.expression(exp.NotNullColumnConstraint) 6682 if self._match_text_seq("CASESPECIFIC"): 6683 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6684 if self._match_text_seq("FOR", "REPLICATION"): 6685 return self.expression(exp.NotForReplicationColumnConstraint) 6686 6687 # Unconsume the `NOT` token 6688 self._retreat(self._index - 1) 6689 return None 6690 6691 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6692 this = self._parse_id_var() if self._match(TokenType.CONSTRAINT) else None 6693 6694 procedure_option_follows = ( 6695 self._match(TokenType.WITH, advance=False) 6696 and self._next 6697 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6698 ) 6699 6700 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6701 constraint = self.CONSTRAINT_PARSERS[self._prev.text.upper()](self) 6702 if not constraint: 6703 self._retreat(self._index - 1) 6704 return None 6705 6706 return self.expression(exp.ColumnConstraint, this=this, kind=constraint) 6707 6708 return this 6709 6710 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6711 if not self._match(TokenType.CONSTRAINT): 6712 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6713 6714 return self.expression( 6715 exp.Constraint, 6716 this=self._parse_id_var(), 6717 expressions=self._parse_unnamed_constraints(), 6718 ) 6719 6720 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6721 constraints = [] 6722 while True: 6723 constraint = self._parse_unnamed_constraint() or self._parse_function() 6724 if not constraint: 6725 break 6726 constraints.append(constraint) 6727 6728 return constraints 6729 6730 def _parse_unnamed_constraint( 6731 self, constraints: t.Optional[t.Collection[str]] = None 6732 ) -> t.Optional[exp.Expression]: 6733 index = self._index 6734 6735 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6736 constraints or self.CONSTRAINT_PARSERS 6737 ): 6738 return None 6739 6740 constraint = self._prev.text.upper() 6741 if constraint not in self.CONSTRAINT_PARSERS: 6742 self.raise_error(f"No parser found for schema constraint {constraint}.") 6743 6744 constraint = self.CONSTRAINT_PARSERS[constraint](self) 6745 if not constraint: 6746 self._retreat(index) 6747 6748 return constraint 6749 6750 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6751 return self._parse_id_var(any_token=False) 6752 6753 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6754 self._match_texts(("KEY", "INDEX")) 6755 return self.expression( 6756 exp.UniqueColumnConstraint, 6757 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6758 this=self._parse_schema(self._parse_unique_key()), 6759 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6760 on_conflict=self._parse_on_conflict(), 6761 options=self._parse_key_constraint_options(), 6762 ) 6763 6764 def _parse_key_constraint_options(self) -> t.List[str]: 6765 options = [] 6766 while True: 6767 if not self._curr: 6768 break 6769 6770 if self._match(TokenType.ON): 6771 action = None 6772 on = self._advance_any() and self._prev.text 6773 6774 if self._match_text_seq("NO", "ACTION"): 6775 action = "NO ACTION" 6776 elif self._match_text_seq("CASCADE"): 6777 action = "CASCADE" 6778 elif self._match_text_seq("RESTRICT"): 6779 action = "RESTRICT" 6780 elif self._match_pair(TokenType.SET, TokenType.NULL): 6781 action = "SET NULL" 6782 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6783 action = "SET DEFAULT" 6784 else: 6785 self.raise_error("Invalid key constraint") 6786 6787 options.append(f"ON {on} {action}") 6788 else: 6789 var = self._parse_var_from_options( 6790 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6791 ) 6792 if not var: 6793 break 6794 options.append(var.name) 6795 6796 return options 6797 6798 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6799 if match and not self._match(TokenType.REFERENCES): 6800 return None 6801 6802 expressions = None 6803 this = self._parse_table(schema=True) 6804 options = self._parse_key_constraint_options() 6805 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6806 6807 def _parse_foreign_key(self) -> exp.ForeignKey: 6808 expressions = ( 6809 self._parse_wrapped_id_vars() 6810 if not self._match(TokenType.REFERENCES, advance=False) 6811 else None 6812 ) 6813 reference = self._parse_references() 6814 on_options = {} 6815 6816 while self._match(TokenType.ON): 6817 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6818 self.raise_error("Expected DELETE or UPDATE") 6819 6820 kind = self._prev.text.lower() 6821 6822 if self._match_text_seq("NO", "ACTION"): 6823 action = "NO ACTION" 6824 elif self._match(TokenType.SET): 6825 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6826 action = "SET " + self._prev.text.upper() 6827 else: 6828 self._advance() 6829 action = self._prev.text.upper() 6830 6831 on_options[kind] = action 6832 6833 return self.expression( 6834 exp.ForeignKey, 6835 expressions=expressions, 6836 reference=reference, 6837 options=self._parse_key_constraint_options(), 6838 **on_options, # type: ignore 6839 ) 6840 6841 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6842 return self._parse_field() 6843 6844 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6845 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6846 self._retreat(self._index - 1) 6847 return None 6848 6849 id_vars = self._parse_wrapped_id_vars() 6850 return self.expression( 6851 exp.PeriodForSystemTimeConstraint, 6852 this=seq_get(id_vars, 0), 6853 expression=seq_get(id_vars, 1), 6854 ) 6855 6856 def _parse_primary_key( 6857 self, 6858 wrapped_optional: bool = False, 6859 in_props: bool = False, 6860 named_primary_key: bool = False, 6861 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6862 desc = ( 6863 self._prev.token_type == TokenType.DESC 6864 if self._match_set((TokenType.ASC, TokenType.DESC)) 6865 else None 6866 ) 6867 6868 this = None 6869 if ( 6870 named_primary_key 6871 and self._curr.text.upper() not in self.CONSTRAINT_PARSERS 6872 and self._next 6873 and self._next.token_type == TokenType.L_PAREN 6874 ): 6875 this = self._parse_id_var() 6876 6877 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6878 return self.expression( 6879 exp.PrimaryKeyColumnConstraint, 6880 desc=desc, 6881 options=self._parse_key_constraint_options(), 6882 ) 6883 6884 expressions = self._parse_wrapped_csv( 6885 self._parse_primary_key_part, optional=wrapped_optional 6886 ) 6887 6888 return self.expression( 6889 exp.PrimaryKey, 6890 this=this, 6891 expressions=expressions, 6892 include=self._parse_index_params(), 6893 options=self._parse_key_constraint_options(), 6894 ) 6895 6896 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6897 return self._parse_slice(self._parse_alias(self._parse_disjunction(), explicit=True)) 6898 6899 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6900 """ 6901 Parses a datetime column in ODBC format. We parse the column into the corresponding 6902 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6903 same as we did for `DATE('yyyy-mm-dd')`. 6904 6905 Reference: 6906 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6907 """ 6908 self._match(TokenType.VAR) 6909 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6910 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6911 if not self._match(TokenType.R_BRACE): 6912 self.raise_error("Expected }") 6913 return expression 6914 6915 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6916 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6917 return this 6918 6919 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6920 map_token = seq_get(self._tokens, self._index - 2) 6921 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6922 else: 6923 parse_map = False 6924 6925 bracket_kind = self._prev.token_type 6926 if ( 6927 bracket_kind == TokenType.L_BRACE 6928 and self._curr 6929 and self._curr.token_type == TokenType.VAR 6930 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6931 ): 6932 return self._parse_odbc_datetime_literal() 6933 6934 expressions = self._parse_csv( 6935 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6936 ) 6937 6938 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6939 self.raise_error("Expected ]") 6940 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6941 self.raise_error("Expected }") 6942 6943 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6944 if bracket_kind == TokenType.L_BRACE: 6945 this = self.expression( 6946 exp.Struct, 6947 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6948 ) 6949 elif not this: 6950 this = build_array_constructor( 6951 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6952 ) 6953 else: 6954 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6955 if constructor_type: 6956 return build_array_constructor( 6957 constructor_type, 6958 args=expressions, 6959 bracket_kind=bracket_kind, 6960 dialect=self.dialect, 6961 ) 6962 6963 expressions = apply_index_offset( 6964 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6965 ) 6966 this = self.expression( 6967 exp.Bracket, 6968 this=this, 6969 expressions=expressions, 6970 comments=this.pop_comments(), 6971 ) 6972 6973 self._add_comments(this) 6974 return self._parse_bracket(this) 6975 6976 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6977 if not self._match(TokenType.COLON): 6978 return this 6979 6980 if self._match_pair(TokenType.DASH, TokenType.COLON, advance=False): 6981 self._advance() 6982 end: t.Optional[exp.Expression] = -exp.Literal.number("1") 6983 else: 6984 end = self._parse_assignment() 6985 step = self._parse_unary() if self._match(TokenType.COLON) else None 6986 return self.expression(exp.Slice, this=this, expression=end, step=step) 6987 6988 def _parse_case(self) -> t.Optional[exp.Expression]: 6989 if self._match(TokenType.DOT, advance=False): 6990 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6991 self._retreat(self._index - 1) 6992 return None 6993 6994 ifs = [] 6995 default = None 6996 6997 comments = self._prev_comments 6998 expression = self._parse_disjunction() 6999 7000 while self._match(TokenType.WHEN): 7001 this = self._parse_disjunction() 7002 self._match(TokenType.THEN) 7003 then = self._parse_disjunction() 7004 ifs.append(self.expression(exp.If, this=this, true=then)) 7005 7006 if self._match(TokenType.ELSE): 7007 default = self._parse_disjunction() 7008 7009 if not self._match(TokenType.END): 7010 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 7011 default = exp.column("interval") 7012 else: 7013 self.raise_error("Expected END after CASE", self._prev) 7014 7015 return self.expression( 7016 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 7017 ) 7018 7019 def _parse_if(self) -> t.Optional[exp.Expression]: 7020 if self._match(TokenType.L_PAREN): 7021 args = self._parse_csv( 7022 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 7023 ) 7024 this = self.validate_expression(exp.If.from_arg_list(args), args) 7025 self._match_r_paren() 7026 else: 7027 index = self._index - 1 7028 7029 if self.NO_PAREN_IF_COMMANDS and index == 0: 7030 return self._parse_as_command(self._prev) 7031 7032 condition = self._parse_disjunction() 7033 7034 if not condition: 7035 self._retreat(index) 7036 return None 7037 7038 self._match(TokenType.THEN) 7039 true = self._parse_disjunction() 7040 false = self._parse_disjunction() if self._match(TokenType.ELSE) else None 7041 self._match(TokenType.END) 7042 this = self.expression(exp.If, this=condition, true=true, false=false) 7043 7044 return this 7045 7046 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 7047 if not self._match_text_seq("VALUE", "FOR"): 7048 self._retreat(self._index - 1) 7049 return None 7050 7051 return self.expression( 7052 exp.NextValueFor, 7053 this=self._parse_column(), 7054 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 7055 ) 7056 7057 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 7058 this = self._parse_function() or self._parse_var_or_string(upper=True) 7059 7060 if self._match(TokenType.FROM): 7061 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 7062 7063 if not self._match(TokenType.COMMA): 7064 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 7065 7066 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 7067 7068 def _parse_gap_fill(self) -> exp.GapFill: 7069 self._match(TokenType.TABLE) 7070 this = self._parse_table() 7071 7072 self._match(TokenType.COMMA) 7073 args = [this, *self._parse_csv(self._parse_lambda)] 7074 7075 gap_fill = exp.GapFill.from_arg_list(args) 7076 return self.validate_expression(gap_fill, args) 7077 7078 def _parse_char(self) -> exp.Chr: 7079 return self.expression( 7080 exp.Chr, 7081 expressions=self._parse_csv(self._parse_assignment), 7082 charset=self._match(TokenType.USING) and self._parse_var(), 7083 ) 7084 7085 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 7086 this = self._parse_assignment() 7087 7088 if not self._match(TokenType.ALIAS): 7089 if self._match(TokenType.COMMA): 7090 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 7091 7092 self.raise_error("Expected AS after CAST") 7093 7094 fmt = None 7095 to = self._parse_types() 7096 7097 default = None 7098 if self._match(TokenType.DEFAULT): 7099 default = self._parse_bitwise() 7100 self._match_text_seq("ON", "CONVERSION", "ERROR") 7101 7102 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 7103 fmt_string = self._parse_string() 7104 fmt = self._parse_at_time_zone(fmt_string) 7105 7106 if not to: 7107 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 7108 if to.this in exp.DataType.TEMPORAL_TYPES: 7109 this = self.expression( 7110 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 7111 this=this, 7112 format=exp.Literal.string( 7113 format_time( 7114 fmt_string.this if fmt_string else "", 7115 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 7116 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 7117 ) 7118 ), 7119 safe=safe, 7120 ) 7121 7122 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 7123 this.set("zone", fmt.args["zone"]) 7124 return this 7125 elif not to: 7126 self.raise_error("Expected TYPE after CAST") 7127 elif isinstance(to, exp.Identifier): 7128 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 7129 elif to.this == exp.DataType.Type.CHAR: 7130 if self._match(TokenType.CHARACTER_SET): 7131 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 7132 7133 return self.build_cast( 7134 strict=strict, 7135 this=this, 7136 to=to, 7137 format=fmt, 7138 safe=safe, 7139 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 7140 default=default, 7141 ) 7142 7143 def _parse_string_agg(self) -> exp.GroupConcat: 7144 if self._match(TokenType.DISTINCT): 7145 args: t.List[t.Optional[exp.Expression]] = [ 7146 self.expression(exp.Distinct, expressions=[self._parse_disjunction()]) 7147 ] 7148 if self._match(TokenType.COMMA): 7149 args.extend(self._parse_csv(self._parse_disjunction)) 7150 else: 7151 args = self._parse_csv(self._parse_disjunction) # type: ignore 7152 7153 if self._match_text_seq("ON", "OVERFLOW"): 7154 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 7155 if self._match_text_seq("ERROR"): 7156 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 7157 else: 7158 self._match_text_seq("TRUNCATE") 7159 on_overflow = self.expression( 7160 exp.OverflowTruncateBehavior, 7161 this=self._parse_string(), 7162 with_count=( 7163 self._match_text_seq("WITH", "COUNT") 7164 or not self._match_text_seq("WITHOUT", "COUNT") 7165 ), 7166 ) 7167 else: 7168 on_overflow = None 7169 7170 index = self._index 7171 if not self._match(TokenType.R_PAREN) and args: 7172 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 7173 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 7174 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 7175 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 7176 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 7177 7178 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 7179 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 7180 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 7181 if not self._match_text_seq("WITHIN", "GROUP"): 7182 self._retreat(index) 7183 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 7184 7185 # The corresponding match_r_paren will be called in parse_function (caller) 7186 self._match_l_paren() 7187 7188 return self.expression( 7189 exp.GroupConcat, 7190 this=self._parse_order(this=seq_get(args, 0)), 7191 separator=seq_get(args, 1), 7192 on_overflow=on_overflow, 7193 ) 7194 7195 def _parse_convert( 7196 self, strict: bool, safe: t.Optional[bool] = None 7197 ) -> t.Optional[exp.Expression]: 7198 this = self._parse_bitwise() 7199 7200 if self._match(TokenType.USING): 7201 to: t.Optional[exp.Expression] = self.expression( 7202 exp.CharacterSet, this=self._parse_var(tokens={TokenType.BINARY}) 7203 ) 7204 elif self._match(TokenType.COMMA): 7205 to = self._parse_types() 7206 else: 7207 to = None 7208 7209 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 7210 7211 def _parse_xml_element(self) -> exp.XMLElement: 7212 if self._match_text_seq("EVALNAME"): 7213 evalname = True 7214 this = self._parse_bitwise() 7215 else: 7216 evalname = None 7217 self._match_text_seq("NAME") 7218 this = self._parse_id_var() 7219 7220 return self.expression( 7221 exp.XMLElement, 7222 this=this, 7223 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_bitwise), 7224 evalname=evalname, 7225 ) 7226 7227 def _parse_xml_table(self) -> exp.XMLTable: 7228 namespaces = None 7229 passing = None 7230 columns = None 7231 7232 if self._match_text_seq("XMLNAMESPACES", "("): 7233 namespaces = self._parse_xml_namespace() 7234 self._match_text_seq(")", ",") 7235 7236 this = self._parse_string() 7237 7238 if self._match_text_seq("PASSING"): 7239 # The BY VALUE keywords are optional and are provided for semantic clarity 7240 self._match_text_seq("BY", "VALUE") 7241 passing = self._parse_csv(self._parse_column) 7242 7243 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 7244 7245 if self._match_text_seq("COLUMNS"): 7246 columns = self._parse_csv(self._parse_field_def) 7247 7248 return self.expression( 7249 exp.XMLTable, 7250 this=this, 7251 namespaces=namespaces, 7252 passing=passing, 7253 columns=columns, 7254 by_ref=by_ref, 7255 ) 7256 7257 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 7258 namespaces = [] 7259 7260 while True: 7261 if self._match(TokenType.DEFAULT): 7262 uri = self._parse_string() 7263 else: 7264 uri = self._parse_alias(self._parse_string()) 7265 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 7266 if not self._match(TokenType.COMMA): 7267 break 7268 7269 return namespaces 7270 7271 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 7272 args = self._parse_csv(self._parse_disjunction) 7273 7274 if len(args) < 3: 7275 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 7276 7277 return self.expression(exp.DecodeCase, expressions=args) 7278 7279 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 7280 self._match_text_seq("KEY") 7281 key = self._parse_column() 7282 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 7283 self._match_text_seq("VALUE") 7284 value = self._parse_bitwise() 7285 7286 if not key and not value: 7287 return None 7288 return self.expression(exp.JSONKeyValue, this=key, expression=value) 7289 7290 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7291 if not this or not self._match_text_seq("FORMAT", "JSON"): 7292 return this 7293 7294 return self.expression(exp.FormatJson, this=this) 7295 7296 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 7297 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 7298 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 7299 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 7300 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 7301 else: 7302 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 7303 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 7304 7305 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 7306 7307 if not empty and not error and not null: 7308 return None 7309 7310 return self.expression( 7311 exp.OnCondition, 7312 empty=empty, 7313 error=error, 7314 null=null, 7315 ) 7316 7317 def _parse_on_handling( 7318 self, on: str, *values: str 7319 ) -> t.Optional[str] | t.Optional[exp.Expression]: 7320 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 7321 for value in values: 7322 if self._match_text_seq(value, "ON", on): 7323 return f"{value} ON {on}" 7324 7325 index = self._index 7326 if self._match(TokenType.DEFAULT): 7327 default_value = self._parse_bitwise() 7328 if self._match_text_seq("ON", on): 7329 return default_value 7330 7331 self._retreat(index) 7332 7333 return None 7334 7335 @t.overload 7336 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 7337 7338 @t.overload 7339 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 7340 7341 def _parse_json_object(self, agg=False): 7342 star = self._parse_star() 7343 expressions = ( 7344 [star] 7345 if star 7346 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 7347 ) 7348 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 7349 7350 unique_keys = None 7351 if self._match_text_seq("WITH", "UNIQUE"): 7352 unique_keys = True 7353 elif self._match_text_seq("WITHOUT", "UNIQUE"): 7354 unique_keys = False 7355 7356 self._match_text_seq("KEYS") 7357 7358 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 7359 self._parse_type() 7360 ) 7361 encoding = self._match_text_seq("ENCODING") and self._parse_var() 7362 7363 return self.expression( 7364 exp.JSONObjectAgg if agg else exp.JSONObject, 7365 expressions=expressions, 7366 null_handling=null_handling, 7367 unique_keys=unique_keys, 7368 return_type=return_type, 7369 encoding=encoding, 7370 ) 7371 7372 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 7373 def _parse_json_column_def(self) -> exp.JSONColumnDef: 7374 if not self._match_text_seq("NESTED"): 7375 this = self._parse_id_var() 7376 ordinality = self._match_pair(TokenType.FOR, TokenType.ORDINALITY) 7377 kind = self._parse_types(allow_identifiers=False) 7378 nested = None 7379 else: 7380 this = None 7381 ordinality = None 7382 kind = None 7383 nested = True 7384 7385 path = self._match_text_seq("PATH") and self._parse_string() 7386 nested_schema = nested and self._parse_json_schema() 7387 7388 return self.expression( 7389 exp.JSONColumnDef, 7390 this=this, 7391 kind=kind, 7392 path=path, 7393 nested_schema=nested_schema, 7394 ordinality=ordinality, 7395 ) 7396 7397 def _parse_json_schema(self) -> exp.JSONSchema: 7398 self._match_text_seq("COLUMNS") 7399 return self.expression( 7400 exp.JSONSchema, 7401 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 7402 ) 7403 7404 def _parse_json_table(self) -> exp.JSONTable: 7405 this = self._parse_format_json(self._parse_bitwise()) 7406 path = self._match(TokenType.COMMA) and self._parse_string() 7407 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 7408 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 7409 schema = self._parse_json_schema() 7410 7411 return exp.JSONTable( 7412 this=this, 7413 schema=schema, 7414 path=path, 7415 error_handling=error_handling, 7416 empty_handling=empty_handling, 7417 ) 7418 7419 def _parse_match_against(self) -> exp.MatchAgainst: 7420 if self._match_text_seq("TABLE"): 7421 # parse SingleStore MATCH(TABLE ...) syntax 7422 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 7423 expressions = [] 7424 table = self._parse_table() 7425 if table: 7426 expressions = [table] 7427 else: 7428 expressions = self._parse_csv(self._parse_column) 7429 7430 self._match_text_seq(")", "AGAINST", "(") 7431 7432 this = self._parse_string() 7433 7434 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 7435 modifier = "IN NATURAL LANGUAGE MODE" 7436 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 7437 modifier = f"{modifier} WITH QUERY EXPANSION" 7438 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 7439 modifier = "IN BOOLEAN MODE" 7440 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 7441 modifier = "WITH QUERY EXPANSION" 7442 else: 7443 modifier = None 7444 7445 return self.expression( 7446 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 7447 ) 7448 7449 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 7450 def _parse_open_json(self) -> exp.OpenJSON: 7451 this = self._parse_bitwise() 7452 path = self._match(TokenType.COMMA) and self._parse_string() 7453 7454 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 7455 this = self._parse_field(any_token=True) 7456 kind = self._parse_types() 7457 path = self._parse_string() 7458 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 7459 7460 return self.expression( 7461 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 7462 ) 7463 7464 expressions = None 7465 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 7466 self._match_l_paren() 7467 expressions = self._parse_csv(_parse_open_json_column_def) 7468 7469 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 7470 7471 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 7472 args = self._parse_csv(self._parse_bitwise) 7473 7474 if self._match(TokenType.IN): 7475 return self.expression( 7476 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 7477 ) 7478 7479 if haystack_first: 7480 haystack = seq_get(args, 0) 7481 needle = seq_get(args, 1) 7482 else: 7483 haystack = seq_get(args, 1) 7484 needle = seq_get(args, 0) 7485 7486 return self.expression( 7487 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 7488 ) 7489 7490 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 7491 args = self._parse_csv(self._parse_table) 7492 return exp.JoinHint(this=func_name.upper(), expressions=args) 7493 7494 def _parse_substring(self) -> exp.Substring: 7495 # Postgres supports the form: substring(string [from int] [for int]) 7496 # (despite being undocumented, the reverse order also works) 7497 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 7498 7499 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 7500 7501 start, length = None, None 7502 7503 while self._curr: 7504 if self._match(TokenType.FROM): 7505 start = self._parse_bitwise() 7506 elif self._match(TokenType.FOR): 7507 if not start: 7508 start = exp.Literal.number(1) 7509 length = self._parse_bitwise() 7510 else: 7511 break 7512 7513 if start: 7514 args.append(start) 7515 if length: 7516 args.append(length) 7517 7518 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7519 7520 def _parse_trim(self) -> exp.Trim: 7521 # https://www.w3resource.com/sql/character-functions/trim.php 7522 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7523 7524 position = None 7525 collation = None 7526 expression = None 7527 7528 if self._match_texts(self.TRIM_TYPES): 7529 position = self._prev.text.upper() 7530 7531 this = self._parse_bitwise() 7532 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7533 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7534 expression = self._parse_bitwise() 7535 7536 if invert_order: 7537 this, expression = expression, this 7538 7539 if self._match(TokenType.COLLATE): 7540 collation = self._parse_bitwise() 7541 7542 return self.expression( 7543 exp.Trim, this=this, position=position, expression=expression, collation=collation 7544 ) 7545 7546 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7547 return self._parse_csv(self._parse_named_window) if self._match(TokenType.WINDOW) else None 7548 7549 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7550 return self._parse_window(self._parse_id_var(), alias=True) 7551 7552 def _parse_respect_or_ignore_nulls( 7553 self, this: t.Optional[exp.Expression] 7554 ) -> t.Optional[exp.Expression]: 7555 if self._match_text_seq("IGNORE", "NULLS"): 7556 return self.expression(exp.IgnoreNulls, this=this) 7557 if self._match_text_seq("RESPECT", "NULLS"): 7558 return self.expression(exp.RespectNulls, this=this) 7559 return this 7560 7561 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7562 if self._match(TokenType.HAVING): 7563 self._match_texts(("MAX", "MIN")) 7564 max = self._prev.text.upper() != "MIN" 7565 return self.expression( 7566 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7567 ) 7568 7569 return this 7570 7571 def _parse_window( 7572 self, this: t.Optional[exp.Expression], alias: bool = False 7573 ) -> t.Optional[exp.Expression]: 7574 func = this 7575 comments = func.comments if isinstance(func, exp.Expression) else None 7576 7577 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7578 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7579 if self._match_text_seq("WITHIN", "GROUP"): 7580 order = self._parse_wrapped(self._parse_order) 7581 this = self.expression(exp.WithinGroup, this=this, expression=order) 7582 7583 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7584 self._match(TokenType.WHERE) 7585 this = self.expression( 7586 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7587 ) 7588 self._match_r_paren() 7589 7590 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7591 # Some dialects choose to implement and some do not. 7592 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7593 7594 # There is some code above in _parse_lambda that handles 7595 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7596 7597 # The below changes handle 7598 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7599 7600 # Oracle allows both formats 7601 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7602 # and Snowflake chose to do the same for familiarity 7603 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7604 if isinstance(this, exp.AggFunc): 7605 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7606 7607 if ignore_respect and ignore_respect is not this: 7608 ignore_respect.replace(ignore_respect.this) 7609 this = self.expression(ignore_respect.__class__, this=this) 7610 7611 this = self._parse_respect_or_ignore_nulls(this) 7612 7613 # bigquery select from window x AS (partition by ...) 7614 if alias: 7615 over = None 7616 self._match(TokenType.ALIAS) 7617 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7618 return this 7619 else: 7620 over = self._prev.text.upper() 7621 7622 if comments and isinstance(func, exp.Expression): 7623 func.pop_comments() 7624 7625 if not self._match(TokenType.L_PAREN): 7626 return self.expression( 7627 exp.Window, 7628 comments=comments, 7629 this=this, 7630 alias=self._parse_id_var(False), 7631 over=over, 7632 ) 7633 7634 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7635 7636 first: t.Optional[bool] = True if self._match(TokenType.FIRST) else None 7637 if self._match_text_seq("LAST"): 7638 first = False 7639 7640 partition, order = self._parse_partition_and_order() 7641 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7642 7643 if kind: 7644 self._match(TokenType.BETWEEN) 7645 start = self._parse_window_spec() 7646 7647 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7648 exclude = ( 7649 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7650 if self._match_text_seq("EXCLUDE") 7651 else None 7652 ) 7653 7654 spec = self.expression( 7655 exp.WindowSpec, 7656 kind=kind, 7657 start=start["value"], 7658 start_side=start["side"], 7659 end=end.get("value"), 7660 end_side=end.get("side"), 7661 exclude=exclude, 7662 ) 7663 else: 7664 spec = None 7665 7666 self._match_r_paren() 7667 7668 window = self.expression( 7669 exp.Window, 7670 comments=comments, 7671 this=this, 7672 partition_by=partition, 7673 order=order, 7674 spec=spec, 7675 alias=window_alias, 7676 over=over, 7677 first=first, 7678 ) 7679 7680 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7681 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7682 return self._parse_window(window, alias=alias) 7683 7684 return window 7685 7686 def _parse_partition_and_order( 7687 self, 7688 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7689 return self._parse_partition_by(), self._parse_order() 7690 7691 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7692 self._match(TokenType.BETWEEN) 7693 7694 return { 7695 "value": ( 7696 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7697 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7698 or self._parse_bitwise() 7699 ), 7700 "side": self._prev.text if self._match_texts(self.WINDOW_SIDES) else None, 7701 } 7702 7703 def _parse_alias( 7704 self, this: t.Optional[exp.Expression], explicit: bool = False 7705 ) -> t.Optional[exp.Expression]: 7706 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7707 # so this section tries to parse the clause version and if it fails, it treats the token 7708 # as an identifier (alias) 7709 if self._can_parse_limit_or_offset(): 7710 return this 7711 7712 any_token = self._match(TokenType.ALIAS) 7713 comments = self._prev_comments or [] 7714 7715 if explicit and not any_token: 7716 return this 7717 7718 if self._match(TokenType.L_PAREN): 7719 aliases = self.expression( 7720 exp.Aliases, 7721 comments=comments, 7722 this=this, 7723 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7724 ) 7725 self._match_r_paren(aliases) 7726 return aliases 7727 7728 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7729 self.STRING_ALIASES and self._parse_string_as_identifier() 7730 ) 7731 7732 if alias: 7733 comments.extend(alias.pop_comments()) 7734 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7735 column = this.this 7736 7737 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7738 if not this.comments and column and column.comments: 7739 this.comments = column.pop_comments() 7740 7741 return this 7742 7743 def _parse_id_var( 7744 self, 7745 any_token: bool = True, 7746 tokens: t.Optional[t.Collection[TokenType]] = None, 7747 ) -> t.Optional[exp.Expression]: 7748 expression = self._parse_identifier() 7749 if not expression and ( 7750 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7751 ): 7752 quoted = self._prev.token_type == TokenType.STRING 7753 expression = self._identifier_expression(quoted=quoted) 7754 7755 return expression 7756 7757 def _parse_string(self) -> t.Optional[exp.Expression]: 7758 if self._match_set(self.STRING_PARSERS): 7759 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7760 return self._parse_placeholder() 7761 7762 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7763 output = exp.to_identifier( 7764 self._prev.text if self._match(TokenType.STRING) else None, quoted=True 7765 ) 7766 if output: 7767 output.update_positions(self._prev) 7768 return output 7769 7770 def _parse_number(self) -> t.Optional[exp.Expression]: 7771 if self._match_set(self.NUMERIC_PARSERS): 7772 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7773 return self._parse_placeholder() 7774 7775 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7776 if self._match(TokenType.IDENTIFIER): 7777 return self._identifier_expression(quoted=True) 7778 return self._parse_placeholder() 7779 7780 def _parse_var( 7781 self, 7782 any_token: bool = False, 7783 tokens: t.Optional[t.Collection[TokenType]] = None, 7784 upper: bool = False, 7785 ) -> t.Optional[exp.Expression]: 7786 if ( 7787 (any_token and self._advance_any()) 7788 or self._match(TokenType.VAR) 7789 or (self._match_set(tokens) if tokens else False) 7790 ): 7791 return self.expression( 7792 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7793 ) 7794 return self._parse_placeholder() 7795 7796 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7797 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7798 self._advance() 7799 return self._prev 7800 return None 7801 7802 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7803 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7804 7805 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7806 return self._parse_primary() or self._parse_var(any_token=True) 7807 7808 def _parse_null(self) -> t.Optional[exp.Expression]: 7809 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7810 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7811 return self._parse_placeholder() 7812 7813 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7814 if self._match(TokenType.TRUE): 7815 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7816 if self._match(TokenType.FALSE): 7817 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7818 return self._parse_placeholder() 7819 7820 def _parse_star(self) -> t.Optional[exp.Expression]: 7821 if self._match(TokenType.STAR): 7822 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7823 return self._parse_placeholder() 7824 7825 def _parse_parameter(self) -> exp.Parameter: 7826 this = self._parse_identifier() or self._parse_primary_or_var() 7827 return self.expression(exp.Parameter, this=this) 7828 7829 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7830 if self._match_set(self.PLACEHOLDER_PARSERS): 7831 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7832 if placeholder: 7833 return placeholder 7834 self._advance(-1) 7835 return None 7836 7837 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7838 if not self._match_texts(keywords): 7839 return None 7840 if self._match(TokenType.L_PAREN, advance=False): 7841 return self._parse_wrapped_csv(self._parse_expression) 7842 7843 expression = self._parse_alias(self._parse_disjunction(), explicit=True) 7844 return [expression] if expression else None 7845 7846 def _parse_csv( 7847 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7848 ) -> t.List[exp.Expression]: 7849 parse_result = parse_method() 7850 items = [parse_result] if parse_result is not None else [] 7851 7852 while self._match(sep): 7853 self._add_comments(parse_result) 7854 parse_result = parse_method() 7855 if parse_result is not None: 7856 items.append(parse_result) 7857 7858 return items 7859 7860 def _parse_tokens( 7861 self, parse_method: t.Callable, expressions: t.Dict 7862 ) -> t.Optional[exp.Expression]: 7863 this = parse_method() 7864 7865 while self._match_set(expressions): 7866 this = self.expression( 7867 expressions[self._prev.token_type], 7868 this=this, 7869 comments=self._prev_comments, 7870 expression=parse_method(), 7871 ) 7872 7873 return this 7874 7875 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7876 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7877 7878 def _parse_wrapped_csv( 7879 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7880 ) -> t.List[exp.Expression]: 7881 return self._parse_wrapped( 7882 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7883 ) 7884 7885 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7886 wrapped = self._match(TokenType.L_PAREN) 7887 if not wrapped and not optional: 7888 self.raise_error("Expecting (") 7889 parse_result = parse_method() 7890 if wrapped: 7891 self._match_r_paren() 7892 return parse_result 7893 7894 def _parse_expressions(self) -> t.List[exp.Expression]: 7895 return self._parse_csv(self._parse_expression) 7896 7897 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7898 return ( 7899 self._parse_set_operations( 7900 self._parse_alias(self._parse_assignment(), explicit=True) 7901 if alias 7902 else self._parse_assignment() 7903 ) 7904 or self._parse_select() 7905 ) 7906 7907 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7908 return self._parse_query_modifiers( 7909 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7910 ) 7911 7912 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7913 this = None 7914 if self._match_texts(self.TRANSACTION_KIND): 7915 this = self._prev.text 7916 7917 self._match_texts(("TRANSACTION", "WORK")) 7918 7919 modes = [] 7920 while True: 7921 mode = [] 7922 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7923 mode.append(self._prev.text) 7924 7925 if mode: 7926 modes.append(" ".join(mode)) 7927 if not self._match(TokenType.COMMA): 7928 break 7929 7930 return self.expression(exp.Transaction, this=this, modes=modes) 7931 7932 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7933 chain = None 7934 savepoint = None 7935 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7936 7937 self._match_texts(("TRANSACTION", "WORK")) 7938 7939 if self._match_text_seq("TO"): 7940 self._match_text_seq("SAVEPOINT") 7941 savepoint = self._parse_id_var() 7942 7943 if self._match(TokenType.AND): 7944 chain = not self._match_text_seq("NO") 7945 self._match_text_seq("CHAIN") 7946 7947 if is_rollback: 7948 return self.expression(exp.Rollback, savepoint=savepoint) 7949 7950 return self.expression(exp.Commit, chain=chain) 7951 7952 def _parse_refresh(self) -> exp.Refresh | exp.Command: 7953 if self._match(TokenType.TABLE): 7954 kind = "TABLE" 7955 elif self._match_text_seq("MATERIALIZED", "VIEW"): 7956 kind = "MATERIALIZED VIEW" 7957 else: 7958 kind = "" 7959 7960 this = self._parse_string() or self._parse_table() 7961 if not kind and not isinstance(this, exp.Literal): 7962 return self._parse_as_command(self._prev) 7963 7964 return self.expression(exp.Refresh, this=this, kind=kind) 7965 7966 def _parse_column_def_with_exists(self): 7967 start = self._index 7968 self._match(TokenType.COLUMN) 7969 7970 exists_column = self._parse_exists(not_=True) 7971 expression = self._parse_field_def() 7972 7973 if not isinstance(expression, exp.ColumnDef): 7974 self._retreat(start) 7975 return None 7976 7977 expression.set("exists", exists_column) 7978 7979 return expression 7980 7981 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7982 if not self._prev.text.upper() == "ADD": 7983 return None 7984 7985 expression = self._parse_column_def_with_exists() 7986 if not expression: 7987 return None 7988 7989 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7990 if self._match_texts(("FIRST", "AFTER")): 7991 position = self._prev.text 7992 column_position = self.expression( 7993 exp.ColumnPosition, this=self._parse_column(), position=position 7994 ) 7995 expression.set("position", column_position) 7996 7997 return expression 7998 7999 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 8000 drop = self._parse_drop() if self._match(TokenType.DROP) else None 8001 if drop and not isinstance(drop, exp.Command): 8002 drop.set("kind", drop.args.get("kind", "COLUMN")) 8003 return drop 8004 8005 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 8006 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 8007 return self.expression( 8008 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 8009 ) 8010 8011 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 8012 def _parse_add_alteration() -> t.Optional[exp.Expression]: 8013 self._match_text_seq("ADD") 8014 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 8015 return self.expression( 8016 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 8017 ) 8018 8019 column_def = self._parse_add_column() 8020 if isinstance(column_def, exp.ColumnDef): 8021 return column_def 8022 8023 exists = self._parse_exists(not_=True) 8024 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 8025 return self.expression( 8026 exp.AddPartition, 8027 exists=exists, 8028 this=self._parse_field(any_token=True), 8029 location=self._match_text_seq("LOCATION", advance=False) 8030 and self._parse_property(), 8031 ) 8032 8033 return None 8034 8035 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 8036 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 8037 or self._match_text_seq("COLUMNS") 8038 ): 8039 schema = self._parse_schema() 8040 8041 return ( 8042 ensure_list(schema) 8043 if schema 8044 else self._parse_csv(self._parse_column_def_with_exists) 8045 ) 8046 8047 return self._parse_csv(_parse_add_alteration) 8048 8049 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 8050 if self._match_texts(self.ALTER_ALTER_PARSERS): 8051 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 8052 8053 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 8054 # keyword after ALTER we default to parsing this statement 8055 self._match(TokenType.COLUMN) 8056 column = self._parse_field(any_token=True) 8057 8058 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 8059 return self.expression(exp.AlterColumn, this=column, drop=True) 8060 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 8061 return self.expression(exp.AlterColumn, this=column, default=self._parse_disjunction()) 8062 if self._match(TokenType.COMMENT): 8063 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 8064 if self._match_text_seq("DROP", "NOT", "NULL"): 8065 return self.expression( 8066 exp.AlterColumn, 8067 this=column, 8068 drop=True, 8069 allow_null=True, 8070 ) 8071 if self._match_text_seq("SET", "NOT", "NULL"): 8072 return self.expression( 8073 exp.AlterColumn, 8074 this=column, 8075 allow_null=False, 8076 ) 8077 8078 if self._match_text_seq("SET", "VISIBLE"): 8079 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 8080 if self._match_text_seq("SET", "INVISIBLE"): 8081 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 8082 8083 self._match_text_seq("SET", "DATA") 8084 self._match_text_seq("TYPE") 8085 return self.expression( 8086 exp.AlterColumn, 8087 this=column, 8088 dtype=self._parse_types(), 8089 collate=self._match(TokenType.COLLATE) and self._parse_term(), 8090 using=self._match(TokenType.USING) and self._parse_disjunction(), 8091 ) 8092 8093 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 8094 if self._match_texts(("ALL", "EVEN", "AUTO")): 8095 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 8096 8097 self._match_text_seq("KEY", "DISTKEY") 8098 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 8099 8100 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 8101 if compound: 8102 self._match_text_seq("SORTKEY") 8103 8104 if self._match(TokenType.L_PAREN, advance=False): 8105 return self.expression( 8106 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 8107 ) 8108 8109 self._match_texts(("AUTO", "NONE")) 8110 return self.expression( 8111 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 8112 ) 8113 8114 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 8115 index = self._index - 1 8116 8117 partition_exists = self._parse_exists() 8118 if self._match(TokenType.PARTITION, advance=False): 8119 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 8120 8121 self._retreat(index) 8122 return self._parse_csv(self._parse_drop_column) 8123 8124 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 8125 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 8126 exists = self._parse_exists() 8127 old_column = self._parse_column() 8128 to = self._match_text_seq("TO") 8129 new_column = self._parse_column() 8130 8131 if old_column is None or not to or new_column is None: 8132 return None 8133 8134 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 8135 8136 self._match_text_seq("TO") 8137 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 8138 8139 def _parse_alter_table_set(self) -> exp.AlterSet: 8140 alter_set = self.expression(exp.AlterSet) 8141 8142 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 8143 "TABLE", "PROPERTIES" 8144 ): 8145 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 8146 elif self._match_text_seq("FILESTREAM_ON", advance=False): 8147 alter_set.set("expressions", [self._parse_assignment()]) 8148 elif self._match_texts(("LOGGED", "UNLOGGED")): 8149 alter_set.set("option", exp.var(self._prev.text.upper())) 8150 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 8151 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 8152 elif self._match_text_seq("LOCATION"): 8153 alter_set.set("location", self._parse_field()) 8154 elif self._match_text_seq("ACCESS", "METHOD"): 8155 alter_set.set("access_method", self._parse_field()) 8156 elif self._match_text_seq("TABLESPACE"): 8157 alter_set.set("tablespace", self._parse_field()) 8158 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 8159 alter_set.set("file_format", [self._parse_field()]) 8160 elif self._match_text_seq("STAGE_FILE_FORMAT"): 8161 alter_set.set("file_format", self._parse_wrapped_options()) 8162 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 8163 alter_set.set("copy_options", self._parse_wrapped_options()) 8164 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 8165 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 8166 else: 8167 if self._match_text_seq("SERDE"): 8168 alter_set.set("serde", self._parse_field()) 8169 8170 properties = self._parse_wrapped(self._parse_properties, optional=True) 8171 alter_set.set("expressions", [properties]) 8172 8173 return alter_set 8174 8175 def _parse_alter_session(self) -> exp.AlterSession: 8176 """Parse ALTER SESSION SET/UNSET statements.""" 8177 if self._match(TokenType.SET): 8178 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 8179 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 8180 8181 self._match_text_seq("UNSET") 8182 expressions = self._parse_csv( 8183 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 8184 ) 8185 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 8186 8187 def _parse_alter(self) -> exp.Alter | exp.Command: 8188 start = self._prev 8189 8190 alter_token = self._match_set(self.ALTERABLES) and self._prev 8191 if not alter_token: 8192 return self._parse_as_command(start) 8193 8194 exists = self._parse_exists() 8195 only = self._match_text_seq("ONLY") 8196 8197 if alter_token.token_type == TokenType.SESSION: 8198 this = None 8199 check = None 8200 cluster = None 8201 else: 8202 this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS) 8203 check = self._match_text_seq("WITH", "CHECK") 8204 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8205 8206 if self._next: 8207 self._advance() 8208 8209 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 8210 if parser: 8211 actions = ensure_list(parser(self)) 8212 not_valid = self._match_text_seq("NOT", "VALID") 8213 options = self._parse_csv(self._parse_property) 8214 cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE") 8215 8216 if not self._curr and actions: 8217 return self.expression( 8218 exp.Alter, 8219 this=this, 8220 kind=alter_token.text.upper(), 8221 exists=exists, 8222 actions=actions, 8223 only=only, 8224 options=options, 8225 cluster=cluster, 8226 not_valid=not_valid, 8227 check=check, 8228 cascade=cascade, 8229 ) 8230 8231 return self._parse_as_command(start) 8232 8233 def _parse_analyze(self) -> exp.Analyze | exp.Command: 8234 start = self._prev 8235 # https://duckdb.org/docs/sql/statements/analyze 8236 if not self._curr: 8237 return self.expression(exp.Analyze) 8238 8239 options = [] 8240 while self._match_texts(self.ANALYZE_STYLES): 8241 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 8242 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 8243 else: 8244 options.append(self._prev.text.upper()) 8245 8246 this: t.Optional[exp.Expression] = None 8247 inner_expression: t.Optional[exp.Expression] = None 8248 8249 kind = self._curr and self._curr.text.upper() 8250 8251 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 8252 this = self._parse_table_parts() 8253 elif self._match_text_seq("TABLES"): 8254 if self._match_set((TokenType.FROM, TokenType.IN)): 8255 kind = f"{kind} {self._prev.text.upper()}" 8256 this = self._parse_table(schema=True, is_db_reference=True) 8257 elif self._match_text_seq("DATABASE"): 8258 this = self._parse_table(schema=True, is_db_reference=True) 8259 elif self._match_text_seq("CLUSTER"): 8260 this = self._parse_table() 8261 # Try matching inner expr keywords before fallback to parse table. 8262 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 8263 kind = None 8264 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 8265 else: 8266 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 8267 kind = None 8268 this = self._parse_table_parts() 8269 8270 partition = self._try_parse(self._parse_partition) 8271 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 8272 return self._parse_as_command(start) 8273 8274 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 8275 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 8276 "WITH", "ASYNC", "MODE" 8277 ): 8278 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 8279 else: 8280 mode = None 8281 8282 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 8283 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 8284 8285 properties = self._parse_properties() 8286 return self.expression( 8287 exp.Analyze, 8288 kind=kind, 8289 this=this, 8290 mode=mode, 8291 partition=partition, 8292 properties=properties, 8293 expression=inner_expression, 8294 options=options, 8295 ) 8296 8297 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 8298 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 8299 this = None 8300 kind = self._prev.text.upper() 8301 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 8302 expressions = [] 8303 8304 if not self._match_text_seq("STATISTICS"): 8305 self.raise_error("Expecting token STATISTICS") 8306 8307 if self._match_text_seq("NOSCAN"): 8308 this = "NOSCAN" 8309 elif self._match(TokenType.FOR): 8310 if self._match_text_seq("ALL", "COLUMNS"): 8311 this = "FOR ALL COLUMNS" 8312 if self._match_texts("COLUMNS"): 8313 this = "FOR COLUMNS" 8314 expressions = self._parse_csv(self._parse_column_reference) 8315 elif self._match_text_seq("SAMPLE"): 8316 sample = self._parse_number() 8317 expressions = [ 8318 self.expression( 8319 exp.AnalyzeSample, 8320 sample=sample, 8321 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 8322 ) 8323 ] 8324 8325 return self.expression( 8326 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 8327 ) 8328 8329 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 8330 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 8331 kind = None 8332 this = None 8333 expression: t.Optional[exp.Expression] = None 8334 if self._match_text_seq("REF", "UPDATE"): 8335 kind = "REF" 8336 this = "UPDATE" 8337 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 8338 this = "UPDATE SET DANGLING TO NULL" 8339 elif self._match_text_seq("STRUCTURE"): 8340 kind = "STRUCTURE" 8341 if self._match_text_seq("CASCADE", "FAST"): 8342 this = "CASCADE FAST" 8343 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 8344 ("ONLINE", "OFFLINE") 8345 ): 8346 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 8347 expression = self._parse_into() 8348 8349 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 8350 8351 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 8352 this = self._prev.text.upper() 8353 if self._match_text_seq("COLUMNS"): 8354 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 8355 return None 8356 8357 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 8358 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 8359 if self._match_text_seq("STATISTICS"): 8360 return self.expression(exp.AnalyzeDelete, kind=kind) 8361 return None 8362 8363 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 8364 if self._match_text_seq("CHAINED", "ROWS"): 8365 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 8366 return None 8367 8368 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 8369 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 8370 this = self._prev.text.upper() 8371 expression: t.Optional[exp.Expression] = None 8372 expressions = [] 8373 update_options = None 8374 8375 if self._match_text_seq("HISTOGRAM", "ON"): 8376 expressions = self._parse_csv(self._parse_column_reference) 8377 with_expressions = [] 8378 while self._match(TokenType.WITH): 8379 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 8380 if self._match_texts(("SYNC", "ASYNC")): 8381 if self._match_text_seq("MODE", advance=False): 8382 with_expressions.append(f"{self._prev.text.upper()} MODE") 8383 self._advance() 8384 else: 8385 buckets = self._parse_number() 8386 if self._match_text_seq("BUCKETS"): 8387 with_expressions.append(f"{buckets} BUCKETS") 8388 if with_expressions: 8389 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 8390 8391 if self._match_texts(("MANUAL", "AUTO")) and self._match( 8392 TokenType.UPDATE, advance=False 8393 ): 8394 update_options = self._prev.text.upper() 8395 self._advance() 8396 elif self._match_text_seq("USING", "DATA"): 8397 expression = self.expression(exp.UsingData, this=self._parse_string()) 8398 8399 return self.expression( 8400 exp.AnalyzeHistogram, 8401 this=this, 8402 expressions=expressions, 8403 expression=expression, 8404 update_options=update_options, 8405 ) 8406 8407 def _parse_merge(self) -> exp.Merge: 8408 self._match(TokenType.INTO) 8409 target = self._parse_table() 8410 8411 if target and self._match(TokenType.ALIAS, advance=False): 8412 target.set("alias", self._parse_table_alias()) 8413 8414 self._match(TokenType.USING) 8415 using = self._parse_table() 8416 8417 return self.expression( 8418 exp.Merge, 8419 this=target, 8420 using=using, 8421 on=self._match(TokenType.ON) and self._parse_disjunction(), 8422 using_cond=self._match(TokenType.USING) and self._parse_using_identifiers(), 8423 whens=self._parse_when_matched(), 8424 returning=self._parse_returning(), 8425 ) 8426 8427 def _parse_when_matched(self) -> exp.Whens: 8428 whens = [] 8429 8430 while self._match(TokenType.WHEN): 8431 matched = not self._match(TokenType.NOT) 8432 self._match_text_seq("MATCHED") 8433 source = ( 8434 False 8435 if self._match_text_seq("BY", "TARGET") 8436 else self._match_text_seq("BY", "SOURCE") 8437 ) 8438 condition = self._parse_disjunction() if self._match(TokenType.AND) else None 8439 8440 self._match(TokenType.THEN) 8441 8442 if self._match(TokenType.INSERT): 8443 this = self._parse_star() 8444 if this: 8445 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 8446 else: 8447 then = self.expression( 8448 exp.Insert, 8449 this=exp.var("ROW") 8450 if self._match_text_seq("ROW") 8451 else self._parse_value(values=False), 8452 expression=self._match_text_seq("VALUES") and self._parse_value(), 8453 ) 8454 elif self._match(TokenType.UPDATE): 8455 expressions = self._parse_star() 8456 if expressions: 8457 then = self.expression(exp.Update, expressions=expressions) 8458 else: 8459 then = self.expression( 8460 exp.Update, 8461 expressions=self._match(TokenType.SET) 8462 and self._parse_csv(self._parse_equality), 8463 ) 8464 elif self._match(TokenType.DELETE): 8465 then = self.expression(exp.Var, this=self._prev.text) 8466 else: 8467 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 8468 8469 whens.append( 8470 self.expression( 8471 exp.When, 8472 matched=matched, 8473 source=source, 8474 condition=condition, 8475 then=then, 8476 ) 8477 ) 8478 return self.expression(exp.Whens, expressions=whens) 8479 8480 def _parse_show(self) -> t.Optional[exp.Expression]: 8481 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 8482 if parser: 8483 return parser(self) 8484 return self._parse_as_command(self._prev) 8485 8486 def _parse_set_item_assignment( 8487 self, kind: t.Optional[str] = None 8488 ) -> t.Optional[exp.Expression]: 8489 index = self._index 8490 8491 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 8492 return self._parse_set_transaction(global_=kind == "GLOBAL") 8493 8494 left = self._parse_primary() or self._parse_column() 8495 assignment_delimiter = self._match_texts(self.SET_ASSIGNMENT_DELIMITERS) 8496 8497 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 8498 self._retreat(index) 8499 return None 8500 8501 right = self._parse_statement() or self._parse_id_var() 8502 if isinstance(right, (exp.Column, exp.Identifier)): 8503 right = exp.var(right.name) 8504 8505 this = self.expression(exp.EQ, this=left, expression=right) 8506 return self.expression(exp.SetItem, this=this, kind=kind) 8507 8508 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 8509 self._match_text_seq("TRANSACTION") 8510 characteristics = self._parse_csv( 8511 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 8512 ) 8513 return self.expression( 8514 exp.SetItem, 8515 expressions=characteristics, 8516 kind="TRANSACTION", 8517 global_=global_, 8518 ) 8519 8520 def _parse_set_item(self) -> t.Optional[exp.Expression]: 8521 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 8522 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 8523 8524 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 8525 index = self._index 8526 set_ = self.expression( 8527 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 8528 ) 8529 8530 if self._curr: 8531 self._retreat(index) 8532 return self._parse_as_command(self._prev) 8533 8534 return set_ 8535 8536 def _parse_var_from_options( 8537 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8538 ) -> t.Optional[exp.Var]: 8539 start = self._curr 8540 if not start: 8541 return None 8542 8543 option = start.text.upper() 8544 continuations = options.get(option) 8545 8546 index = self._index 8547 self._advance() 8548 for keywords in continuations or []: 8549 if isinstance(keywords, str): 8550 keywords = (keywords,) 8551 8552 if self._match_text_seq(*keywords): 8553 option = f"{option} {' '.join(keywords)}" 8554 break 8555 else: 8556 if continuations or continuations is None: 8557 if raise_unmatched: 8558 self.raise_error(f"Unknown option {option}") 8559 8560 self._retreat(index) 8561 return None 8562 8563 return exp.var(option) 8564 8565 def _parse_as_command(self, start: Token) -> exp.Command: 8566 while self._curr: 8567 self._advance() 8568 text = self._find_sql(start, self._prev) 8569 size = len(start.text) 8570 self._warn_unsupported() 8571 return exp.Command(this=text[:size], expression=text[size:]) 8572 8573 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8574 settings = [] 8575 8576 self._match_l_paren() 8577 kind = self._parse_id_var() 8578 8579 if self._match(TokenType.L_PAREN): 8580 while True: 8581 key = self._parse_id_var() 8582 value = self._parse_primary() 8583 if not key and value is None: 8584 break 8585 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8586 self._match(TokenType.R_PAREN) 8587 8588 self._match_r_paren() 8589 8590 return self.expression( 8591 exp.DictProperty, 8592 this=this, 8593 kind=kind.this if kind else None, 8594 settings=settings, 8595 ) 8596 8597 def _parse_dict_range(self, this: str) -> exp.DictRange: 8598 self._match_l_paren() 8599 has_min = self._match_text_seq("MIN") 8600 if has_min: 8601 min = self._parse_var() or self._parse_primary() 8602 self._match_text_seq("MAX") 8603 max = self._parse_var() or self._parse_primary() 8604 else: 8605 max = self._parse_var() or self._parse_primary() 8606 min = exp.Literal.number(0) 8607 self._match_r_paren() 8608 return self.expression(exp.DictRange, this=this, min=min, max=max) 8609 8610 def _parse_comprehension( 8611 self, this: t.Optional[exp.Expression] 8612 ) -> t.Optional[exp.Comprehension]: 8613 index = self._index 8614 expression = self._parse_column() 8615 position = self._match(TokenType.COMMA) and self._parse_column() 8616 8617 if not self._match(TokenType.IN): 8618 self._retreat(index - 1) 8619 return None 8620 iterator = self._parse_column() 8621 condition = self._parse_disjunction() if self._match_text_seq("IF") else None 8622 return self.expression( 8623 exp.Comprehension, 8624 this=this, 8625 expression=expression, 8626 position=position, 8627 iterator=iterator, 8628 condition=condition, 8629 ) 8630 8631 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8632 if self._match(TokenType.HEREDOC_STRING): 8633 return self.expression(exp.Heredoc, this=self._prev.text) 8634 8635 if not self._match_text_seq("$"): 8636 return None 8637 8638 tags = ["$"] 8639 tag_text = None 8640 8641 if self._is_connected(): 8642 self._advance() 8643 tags.append(self._prev.text.upper()) 8644 else: 8645 self.raise_error("No closing $ found") 8646 8647 if tags[-1] != "$": 8648 if self._is_connected() and self._match_text_seq("$"): 8649 tag_text = tags[-1] 8650 tags.append("$") 8651 else: 8652 self.raise_error("No closing $ found") 8653 8654 heredoc_start = self._curr 8655 8656 while self._curr: 8657 if self._match_text_seq(*tags, advance=False): 8658 this = self._find_sql(heredoc_start, self._prev) 8659 self._advance(len(tags)) 8660 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8661 8662 self._advance() 8663 8664 self.raise_error(f"No closing {''.join(tags)} found") 8665 return None 8666 8667 def _find_parser( 8668 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8669 ) -> t.Optional[t.Callable]: 8670 if not self._curr: 8671 return None 8672 8673 index = self._index 8674 this = [] 8675 while True: 8676 # The current token might be multiple words 8677 curr = self._curr.text.upper() 8678 key = curr.split(" ") 8679 this.append(curr) 8680 8681 self._advance() 8682 result, trie = in_trie(trie, key) 8683 if result == TrieResult.FAILED: 8684 break 8685 8686 if result == TrieResult.EXISTS: 8687 subparser = parsers[" ".join(this)] 8688 return subparser 8689 8690 self._retreat(index) 8691 return None 8692 8693 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8694 if not self._match(TokenType.L_PAREN, expression=expression): 8695 self.raise_error("Expecting (") 8696 8697 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8698 if not self._match(TokenType.R_PAREN, expression=expression): 8699 self.raise_error("Expecting )") 8700 8701 def _replace_lambda( 8702 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8703 ) -> t.Optional[exp.Expression]: 8704 if not node: 8705 return node 8706 8707 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8708 8709 for column in node.find_all(exp.Column): 8710 typ = lambda_types.get(column.parts[0].name) 8711 if typ is not None: 8712 dot_or_id = column.to_dot() if column.table else column.this 8713 8714 if typ: 8715 dot_or_id = self.expression( 8716 exp.Cast, 8717 this=dot_or_id, 8718 to=typ, 8719 ) 8720 8721 parent = column.parent 8722 8723 while isinstance(parent, exp.Dot): 8724 if not isinstance(parent.parent, exp.Dot): 8725 parent.replace(dot_or_id) 8726 break 8727 parent = parent.parent 8728 else: 8729 if column is node: 8730 node = dot_or_id 8731 else: 8732 column.replace(dot_or_id) 8733 return node 8734 8735 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8736 start = self._prev 8737 8738 # Not to be confused with TRUNCATE(number, decimals) function call 8739 if self._match(TokenType.L_PAREN): 8740 self._retreat(self._index - 2) 8741 return self._parse_function() 8742 8743 # Clickhouse supports TRUNCATE DATABASE as well 8744 is_database = self._match(TokenType.DATABASE) 8745 8746 self._match(TokenType.TABLE) 8747 8748 exists = self._parse_exists(not_=False) 8749 8750 expressions = self._parse_csv( 8751 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8752 ) 8753 8754 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8755 8756 if self._match_text_seq("RESTART", "IDENTITY"): 8757 identity = "RESTART" 8758 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8759 identity = "CONTINUE" 8760 else: 8761 identity = None 8762 8763 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8764 option = self._prev.text 8765 else: 8766 option = None 8767 8768 partition = self._parse_partition() 8769 8770 # Fallback case 8771 if self._curr: 8772 return self._parse_as_command(start) 8773 8774 return self.expression( 8775 exp.TruncateTable, 8776 expressions=expressions, 8777 is_database=is_database, 8778 exists=exists, 8779 cluster=cluster, 8780 identity=identity, 8781 option=option, 8782 partition=partition, 8783 ) 8784 8785 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8786 this = self._parse_ordered(self._parse_opclass) 8787 8788 if not self._match(TokenType.WITH): 8789 return this 8790 8791 op = self._parse_var(any_token=True, tokens=self.RESERVED_TOKENS) 8792 8793 return self.expression(exp.WithOperator, this=this, op=op) 8794 8795 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8796 self._match(TokenType.EQ) 8797 self._match(TokenType.L_PAREN) 8798 8799 opts: t.List[t.Optional[exp.Expression]] = [] 8800 option: exp.Expression | None 8801 while self._curr and not self._match(TokenType.R_PAREN): 8802 if self._match_text_seq("FORMAT_NAME", "="): 8803 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8804 option = self._parse_format_name() 8805 else: 8806 option = self._parse_property() 8807 8808 if option is None: 8809 self.raise_error("Unable to parse option") 8810 break 8811 8812 opts.append(option) 8813 8814 return opts 8815 8816 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8817 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8818 8819 options = [] 8820 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8821 option = self._parse_var(any_token=True) 8822 prev = self._prev.text.upper() 8823 8824 # Different dialects might separate options and values by white space, "=" and "AS" 8825 self._match(TokenType.EQ) 8826 self._match(TokenType.ALIAS) 8827 8828 param = self.expression(exp.CopyParameter, this=option) 8829 8830 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8831 TokenType.L_PAREN, advance=False 8832 ): 8833 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8834 param.set("expressions", self._parse_wrapped_options()) 8835 elif prev == "FILE_FORMAT": 8836 # T-SQL's external file format case 8837 param.set("expression", self._parse_field()) 8838 elif ( 8839 prev == "FORMAT" 8840 and self._prev.token_type == TokenType.ALIAS 8841 and self._match_texts(("AVRO", "JSON")) 8842 ): 8843 param.set("this", exp.var(f"FORMAT AS {self._prev.text.upper()}")) 8844 param.set("expression", self._parse_field()) 8845 else: 8846 param.set("expression", self._parse_unquoted_field() or self._parse_bracket()) 8847 8848 options.append(param) 8849 8850 if sep: 8851 self._match(sep) 8852 8853 return options 8854 8855 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8856 expr = self.expression(exp.Credentials) 8857 8858 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8859 expr.set("storage", self._parse_field()) 8860 if self._match_text_seq("CREDENTIALS"): 8861 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8862 creds = ( 8863 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8864 ) 8865 expr.set("credentials", creds) 8866 if self._match_text_seq("ENCRYPTION"): 8867 expr.set("encryption", self._parse_wrapped_options()) 8868 if self._match_text_seq("IAM_ROLE"): 8869 expr.set( 8870 "iam_role", 8871 exp.var(self._prev.text) if self._match(TokenType.DEFAULT) else self._parse_field(), 8872 ) 8873 if self._match_text_seq("REGION"): 8874 expr.set("region", self._parse_field()) 8875 8876 return expr 8877 8878 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8879 return self._parse_field() 8880 8881 def _parse_copy(self) -> exp.Copy | exp.Command: 8882 start = self._prev 8883 8884 self._match(TokenType.INTO) 8885 8886 this = ( 8887 self._parse_select(nested=True, parse_subquery_alias=False) 8888 if self._match(TokenType.L_PAREN, advance=False) 8889 else self._parse_table(schema=True) 8890 ) 8891 8892 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8893 8894 files = self._parse_csv(self._parse_file_location) 8895 if self._match(TokenType.EQ, advance=False): 8896 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8897 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8898 # list via `_parse_wrapped(..)` below. 8899 self._advance(-1) 8900 files = [] 8901 8902 credentials = self._parse_credentials() 8903 8904 self._match_text_seq("WITH") 8905 8906 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8907 8908 # Fallback case 8909 if self._curr: 8910 return self._parse_as_command(start) 8911 8912 return self.expression( 8913 exp.Copy, 8914 this=this, 8915 kind=kind, 8916 credentials=credentials, 8917 files=files, 8918 params=params, 8919 ) 8920 8921 def _parse_normalize(self) -> exp.Normalize: 8922 return self.expression( 8923 exp.Normalize, 8924 this=self._parse_bitwise(), 8925 form=self._match(TokenType.COMMA) and self._parse_var(), 8926 ) 8927 8928 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8929 args = self._parse_csv(lambda: self._parse_lambda()) 8930 8931 this = seq_get(args, 0) 8932 decimals = seq_get(args, 1) 8933 8934 return expr_type( 8935 this=this, 8936 decimals=decimals, 8937 to=self._parse_var() if self._match_text_seq("TO") else None, 8938 ) 8939 8940 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8941 star_token = self._prev 8942 8943 if self._match_text_seq("COLUMNS", "(", advance=False): 8944 this = self._parse_function() 8945 if isinstance(this, exp.Columns): 8946 this.set("unpack", True) 8947 return this 8948 8949 return self.expression( 8950 exp.Star, 8951 except_=self._parse_star_op("EXCEPT", "EXCLUDE"), 8952 replace=self._parse_star_op("REPLACE"), 8953 rename=self._parse_star_op("RENAME"), 8954 ).update_positions(star_token) 8955 8956 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8957 privilege_parts = [] 8958 8959 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8960 # (end of privilege list) or L_PAREN (start of column list) are met 8961 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8962 privilege_parts.append(self._curr.text.upper()) 8963 self._advance() 8964 8965 this = exp.var(" ".join(privilege_parts)) 8966 expressions = ( 8967 self._parse_wrapped_csv(self._parse_column) 8968 if self._match(TokenType.L_PAREN, advance=False) 8969 else None 8970 ) 8971 8972 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8973 8974 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8975 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8976 principal = self._parse_id_var() 8977 8978 if not principal: 8979 return None 8980 8981 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8982 8983 def _parse_grant_revoke_common( 8984 self, 8985 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8986 privileges = self._parse_csv(self._parse_grant_privilege) 8987 8988 self._match(TokenType.ON) 8989 kind = self._prev.text.upper() if self._match_set(self.CREATABLES) else None 8990 8991 # Attempt to parse the securable e.g. MySQL allows names 8992 # such as "foo.*", "*.*" which are not easily parseable yet 8993 securable = self._try_parse(self._parse_table_parts) 8994 8995 return privileges, kind, securable 8996 8997 def _parse_grant(self) -> exp.Grant | exp.Command: 8998 start = self._prev 8999 9000 privileges, kind, securable = self._parse_grant_revoke_common() 9001 9002 if not securable or not self._match_text_seq("TO"): 9003 return self._parse_as_command(start) 9004 9005 principals = self._parse_csv(self._parse_grant_principal) 9006 9007 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 9008 9009 if self._curr: 9010 return self._parse_as_command(start) 9011 9012 return self.expression( 9013 exp.Grant, 9014 privileges=privileges, 9015 kind=kind, 9016 securable=securable, 9017 principals=principals, 9018 grant_option=grant_option, 9019 ) 9020 9021 def _parse_revoke(self) -> exp.Revoke | exp.Command: 9022 start = self._prev 9023 9024 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 9025 9026 privileges, kind, securable = self._parse_grant_revoke_common() 9027 9028 if not securable or not self._match_text_seq("FROM"): 9029 return self._parse_as_command(start) 9030 9031 principals = self._parse_csv(self._parse_grant_principal) 9032 9033 cascade = None 9034 if self._match_texts(("CASCADE", "RESTRICT")): 9035 cascade = self._prev.text.upper() 9036 9037 if self._curr: 9038 return self._parse_as_command(start) 9039 9040 return self.expression( 9041 exp.Revoke, 9042 privileges=privileges, 9043 kind=kind, 9044 securable=securable, 9045 principals=principals, 9046 grant_option=grant_option, 9047 cascade=cascade, 9048 ) 9049 9050 def _parse_overlay(self) -> exp.Overlay: 9051 def _parse_overlay_arg(text: str) -> t.Optional[exp.Expression]: 9052 return ( 9053 self._parse_bitwise() 9054 if self._match(TokenType.COMMA) or self._match_text_seq(text) 9055 else None 9056 ) 9057 9058 return self.expression( 9059 exp.Overlay, 9060 this=self._parse_bitwise(), 9061 expression=_parse_overlay_arg("PLACING"), 9062 from_=_parse_overlay_arg("FROM"), 9063 for_=_parse_overlay_arg("FOR"), 9064 ) 9065 9066 def _parse_format_name(self) -> exp.Property: 9067 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 9068 # for FILE_FORMAT = <format_name> 9069 return self.expression( 9070 exp.Property, 9071 this=exp.var("FORMAT_NAME"), 9072 value=self._parse_string() or self._parse_table_parts(), 9073 ) 9074 9075 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 9076 args: t.List[exp.Expression] = [] 9077 9078 if self._match(TokenType.DISTINCT): 9079 args.append(self.expression(exp.Distinct, expressions=[self._parse_lambda()])) 9080 self._match(TokenType.COMMA) 9081 9082 args.extend(self._parse_function_args()) 9083 9084 return self.expression( 9085 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 9086 ) 9087 9088 def _identifier_expression( 9089 self, token: t.Optional[Token] = None, **kwargs: t.Any 9090 ) -> exp.Identifier: 9091 return self.expression(exp.Identifier, token=token or self._prev, **kwargs) 9092 9093 def _build_pipe_cte( 9094 self, 9095 query: exp.Query, 9096 expressions: t.List[exp.Expression], 9097 alias_cte: t.Optional[exp.TableAlias] = None, 9098 ) -> exp.Select: 9099 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 9100 if alias_cte: 9101 new_cte = alias_cte 9102 else: 9103 self._pipe_cte_counter += 1 9104 new_cte = f"__tmp{self._pipe_cte_counter}" 9105 9106 with_ = query.args.get("with_") 9107 ctes = with_.pop() if with_ else None 9108 9109 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 9110 if ctes: 9111 new_select.set("with_", ctes) 9112 9113 return new_select.with_(new_cte, as_=query, copy=False) 9114 9115 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 9116 select = self._parse_select(consume_pipe=False) 9117 if not select: 9118 return query 9119 9120 return self._build_pipe_cte( 9121 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 9122 ) 9123 9124 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 9125 limit = self._parse_limit() 9126 offset = self._parse_offset() 9127 if limit: 9128 curr_limit = query.args.get("limit", limit) 9129 if curr_limit.expression.to_py() >= limit.expression.to_py(): 9130 query.limit(limit, copy=False) 9131 if offset: 9132 curr_offset = query.args.get("offset") 9133 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 9134 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 9135 9136 return query 9137 9138 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 9139 this = self._parse_disjunction() 9140 if self._match_text_seq("GROUP", "AND", advance=False): 9141 return this 9142 9143 this = self._parse_alias(this) 9144 9145 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 9146 return self._parse_ordered(lambda: this) 9147 9148 return this 9149 9150 def _parse_pipe_syntax_aggregate_group_order_by( 9151 self, query: exp.Select, group_by_exists: bool = True 9152 ) -> exp.Select: 9153 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 9154 aggregates_or_groups, orders = [], [] 9155 for element in expr: 9156 if isinstance(element, exp.Ordered): 9157 this = element.this 9158 if isinstance(this, exp.Alias): 9159 element.set("this", this.args["alias"]) 9160 orders.append(element) 9161 else: 9162 this = element 9163 aggregates_or_groups.append(this) 9164 9165 if group_by_exists: 9166 query.select(*aggregates_or_groups, copy=False).group_by( 9167 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 9168 copy=False, 9169 ) 9170 else: 9171 query.select(*aggregates_or_groups, append=False, copy=False) 9172 9173 if orders: 9174 return query.order_by(*orders, append=False, copy=False) 9175 9176 return query 9177 9178 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 9179 self._match_text_seq("AGGREGATE") 9180 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 9181 9182 if self._match(TokenType.GROUP_BY) or ( 9183 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 9184 ): 9185 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 9186 9187 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9188 9189 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 9190 first_setop = self.parse_set_operation(this=query) 9191 if not first_setop: 9192 return None 9193 9194 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 9195 expr = self._parse_paren() 9196 return expr.assert_is(exp.Subquery).unnest() if expr else None 9197 9198 first_setop.this.pop() 9199 9200 setops = [ 9201 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 9202 *self._parse_csv(_parse_and_unwrap_query), 9203 ] 9204 9205 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9206 with_ = query.args.get("with_") 9207 ctes = with_.pop() if with_ else None 9208 9209 if isinstance(first_setop, exp.Union): 9210 query = query.union(*setops, copy=False, **first_setop.args) 9211 elif isinstance(first_setop, exp.Except): 9212 query = query.except_(*setops, copy=False, **first_setop.args) 9213 else: 9214 query = query.intersect(*setops, copy=False, **first_setop.args) 9215 9216 query.set("with_", ctes) 9217 9218 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9219 9220 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 9221 join = self._parse_join() 9222 if not join: 9223 return None 9224 9225 if isinstance(query, exp.Select): 9226 return query.join(join, copy=False) 9227 9228 return query 9229 9230 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 9231 pivots = self._parse_pivots() 9232 if not pivots: 9233 return query 9234 9235 from_ = query.args.get("from_") 9236 if from_: 9237 from_.this.set("pivots", pivots) 9238 9239 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9240 9241 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 9242 self._match_text_seq("EXTEND") 9243 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 9244 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9245 9246 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 9247 sample = self._parse_table_sample() 9248 9249 with_ = query.args.get("with_") 9250 if with_: 9251 with_.expressions[-1].this.set("sample", sample) 9252 else: 9253 query.set("sample", sample) 9254 9255 return query 9256 9257 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 9258 if isinstance(query, exp.Subquery): 9259 query = exp.select("*").from_(query, copy=False) 9260 9261 if not query.args.get("from_"): 9262 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 9263 9264 while self._match(TokenType.PIPE_GT): 9265 start = self._curr 9266 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 9267 if not parser: 9268 # The set operators (UNION, etc) and the JOIN operator have a few common starting 9269 # keywords, making it tricky to disambiguate them without lookahead. The approach 9270 # here is to try and parse a set operation and if that fails, then try to parse a 9271 # join operator. If that fails as well, then the operator is not supported. 9272 parsed_query = self._parse_pipe_syntax_set_operator(query) 9273 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 9274 if not parsed_query: 9275 self._retreat(start) 9276 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 9277 break 9278 query = parsed_query 9279 else: 9280 query = parser(self, query) 9281 9282 return query 9283 9284 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 9285 self._match_texts(("VAR", "VARIABLE")) 9286 9287 vars = self._parse_csv(self._parse_id_var) 9288 if not vars: 9289 return None 9290 9291 self._match(TokenType.ALIAS) 9292 kind = self._parse_schema() if self._match(TokenType.TABLE) else self._parse_types() 9293 default = ( 9294 self._match(TokenType.DEFAULT) or self._match(TokenType.EQ) 9295 ) and self._parse_bitwise() 9296 9297 return self.expression(exp.DeclareItem, this=vars, kind=kind, default=default) 9298 9299 def _parse_declare(self) -> exp.Declare | exp.Command: 9300 start = self._prev 9301 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 9302 9303 if not expressions or self._curr: 9304 return self._parse_as_command(start) 9305 9306 return self.expression(exp.Declare, expressions=expressions) 9307 9308 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 9309 exp_class = exp.Cast if strict else exp.TryCast 9310 9311 if exp_class == exp.TryCast: 9312 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 9313 9314 return self.expression(exp_class, **kwargs) 9315 9316 def _parse_json_value(self) -> exp.JSONValue: 9317 this = self._parse_bitwise() 9318 self._match(TokenType.COMMA) 9319 path = self._parse_bitwise() 9320 9321 returning = self._match(TokenType.RETURNING) and self._parse_type() 9322 9323 return self.expression( 9324 exp.JSONValue, 9325 this=this, 9326 path=self.dialect.to_json_path(path), 9327 returning=returning, 9328 on_condition=self._parse_on_condition(), 9329 ) 9330 9331 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 9332 def concat_exprs( 9333 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 9334 ) -> exp.Expression: 9335 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 9336 concat_exprs = [ 9337 self.expression( 9338 exp.Concat, 9339 expressions=node.expressions, 9340 safe=True, 9341 coalesce=self.dialect.CONCAT_COALESCE, 9342 ) 9343 ] 9344 node.set("expressions", concat_exprs) 9345 return node 9346 if len(exprs) == 1: 9347 return exprs[0] 9348 return self.expression( 9349 exp.Concat, expressions=args, safe=True, coalesce=self.dialect.CONCAT_COALESCE 9350 ) 9351 9352 args = self._parse_csv(self._parse_lambda) 9353 9354 if args: 9355 order = args[-1] if isinstance(args[-1], exp.Order) else None 9356 9357 if order: 9358 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 9359 # remove 'expr' from exp.Order and add it back to args 9360 args[-1] = order.this 9361 order.set("this", concat_exprs(order.this, args)) 9362 9363 this = order or concat_exprs(args[0], args) 9364 else: 9365 this = None 9366 9367 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 9368 9369 return self.expression(exp.GroupConcat, this=this, separator=separator) 9370 9371 def _parse_initcap(self) -> exp.Initcap: 9372 expr = exp.Initcap.from_arg_list(self._parse_function_args()) 9373 9374 # attach dialect's default delimiters 9375 if expr.args.get("expression") is None: 9376 expr.set("expression", exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS)) 9377 9378 return expr 9379 9380 def _parse_operator(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 9381 while True: 9382 if not self._match(TokenType.L_PAREN): 9383 break 9384 9385 op = "" 9386 while self._curr and not self._match(TokenType.R_PAREN): 9387 op += self._curr.text 9388 self._advance() 9389 9390 this = self.expression( 9391 exp.Operator, 9392 comments=self._prev_comments, 9393 this=this, 9394 operator=op, 9395 expression=self._parse_bitwise(), 9396 ) 9397 9398 if not self._match(TokenType.OPERATOR): 9399 break 9400 9401 return this
34def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 35 if len(args) == 1 and args[0].is_star: 36 return exp.StarMap(this=args[0]) 37 38 keys = [] 39 values = [] 40 for i in range(0, len(args), 2): 41 keys.append(args[i]) 42 values.append(args[i + 1]) 43 44 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
52def binary_range_parser( 53 expr_type: t.Type[exp.Expression], reverse_args: bool = False 54) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 55 def _parse_binary_range( 56 self: Parser, this: t.Optional[exp.Expression] 57 ) -> t.Optional[exp.Expression]: 58 expression = self._parse_bitwise() 59 if reverse_args: 60 this, expression = expression, this 61 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 62 63 return _parse_binary_range
66def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 67 # Default argument order is base, expression 68 this = seq_get(args, 0) 69 expression = seq_get(args, 1) 70 71 if expression: 72 if not dialect.LOG_BASE_FIRST: 73 this, expression = expression, this 74 return exp.Log(this=this, expression=expression) 75 76 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
96def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 97 def _builder(args: t.List, dialect: Dialect) -> E: 98 expression = expr_type( 99 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 100 ) 101 if len(args) > 2 and expr_type is exp.JSONExtract: 102 expression.set("expressions", args[2:]) 103 if expr_type is exp.JSONExtractScalar: 104 expression.set("scalar_only", dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY) 105 106 return expression 107 108 return _builder
111def build_mod(args: t.List) -> exp.Mod: 112 this = seq_get(args, 0) 113 expression = seq_get(args, 1) 114 115 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 116 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 117 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 118 119 return exp.Mod(this=this, expression=expression)
131def build_array_constructor( 132 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 133) -> exp.Expression: 134 array_exp = exp_class(expressions=args) 135 136 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 137 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 138 139 return array_exp
142def build_convert_timezone( 143 args: t.List, default_source_tz: t.Optional[str] = None 144) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 145 if len(args) == 2: 146 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 147 return exp.ConvertTimezone( 148 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 149 ) 150 151 return exp.ConvertTimezone.from_arg_list(args)
154def build_trim(args: t.List, is_left: bool = True, reverse_args: bool = False): 155 this, expression = seq_get(args, 0), seq_get(args, 1) 156 157 if expression and reverse_args: 158 this, expression = expression, this 159 160 return exp.Trim(this=this, expression=expression, position="LEADING" if is_left else "TRAILING")
177def build_array_append(args: t.List, dialect: Dialect) -> exp.ArrayAppend: 178 """ 179 Builds ArrayAppend with NULL propagation semantics based on the dialect configuration. 180 181 Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. 182 Others (DuckDB, PostgreSQL) create a new single-element array instead. 183 184 Args: 185 args: Function arguments [array, element] 186 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 187 188 Returns: 189 ArrayAppend expression with appropriate null_propagation flag 190 """ 191 return exp.ArrayAppend( 192 this=seq_get(args, 0), 193 expression=seq_get(args, 1), 194 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 195 )
Builds ArrayAppend with NULL propagation semantics based on the dialect configuration.
Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. Others (DuckDB, PostgreSQL) create a new single-element array instead.
Arguments:
- args: Function arguments [array, element]
- dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
Returns:
ArrayAppend expression with appropriate null_propagation flag
198def build_array_prepend(args: t.List, dialect: Dialect) -> exp.ArrayPrepend: 199 """ 200 Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration. 201 202 Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. 203 Others (DuckDB, PostgreSQL) create a new single-element array instead. 204 205 Args: 206 args: Function arguments [array, element] 207 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 208 209 Returns: 210 ArrayPrepend expression with appropriate null_propagation flag 211 """ 212 return exp.ArrayPrepend( 213 this=seq_get(args, 0), 214 expression=seq_get(args, 1), 215 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 216 )
Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration.
Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. Others (DuckDB, PostgreSQL) create a new single-element array instead.
Arguments:
- args: Function arguments [array, element]
- dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
Returns:
ArrayPrepend expression with appropriate null_propagation flag
219def build_array_concat(args: t.List, dialect: Dialect) -> exp.ArrayConcat: 220 """ 221 Builds ArrayConcat with NULL propagation semantics based on the dialect configuration. 222 223 Some dialects (Redshift, Snowflake) return NULL when any input array is NULL. 224 Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation. 225 226 Args: 227 args: Function arguments [array1, array2, ...] (variadic) 228 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 229 230 Returns: 231 ArrayConcat expression with appropriate null_propagation flag 232 """ 233 return exp.ArrayConcat( 234 this=seq_get(args, 0), 235 expressions=args[1:], 236 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 237 )
Builds ArrayConcat with NULL propagation semantics based on the dialect configuration.
Some dialects (Redshift, Snowflake) return NULL when any input array is NULL. Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation.
Arguments:
- args: Function arguments [array1, array2, ...] (variadic)
- dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
Returns:
ArrayConcat expression with appropriate null_propagation flag
240def build_array_remove(args: t.List, dialect: Dialect) -> exp.ArrayRemove: 241 """ 242 Builds ArrayRemove with NULL propagation semantics based on the dialect configuration. 243 244 Some dialects (Snowflake) return NULL when the removal value is NULL. 245 Others (DuckDB) may return empty array due to NULL comparison semantics. 246 247 Args: 248 args: Function arguments [array, value_to_remove] 249 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 250 251 Returns: 252 ArrayRemove expression with appropriate null_propagation flag 253 """ 254 return exp.ArrayRemove( 255 this=seq_get(args, 0), 256 expression=seq_get(args, 1), 257 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 258 )
Builds ArrayRemove with NULL propagation semantics based on the dialect configuration.
Some dialects (Snowflake) return NULL when the removal value is NULL. Others (DuckDB) may return empty array due to NULL comparison semantics.
Arguments:
- args: Function arguments [array, value_to_remove]
- dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
Returns:
ArrayRemove expression with appropriate null_propagation flag
271class Parser(metaclass=_Parser): 272 """ 273 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 274 275 Args: 276 error_level: The desired error level. 277 Default: ErrorLevel.IMMEDIATE 278 error_message_context: The amount of context to capture from a query string when displaying 279 the error message (in number of characters). 280 Default: 100 281 max_errors: Maximum number of error messages to include in a raised ParseError. 282 This is only relevant if error_level is ErrorLevel.RAISE. 283 Default: 3 284 """ 285 286 FUNCTIONS: t.Dict[str, t.Callable] = { 287 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 288 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 289 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 290 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 291 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 292 ), 293 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 294 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 295 ), 296 "ARRAY_APPEND": build_array_append, 297 "ARRAY_CAT": build_array_concat, 298 "ARRAY_CONCAT": build_array_concat, 299 "ARRAY_PREPEND": build_array_prepend, 300 "ARRAY_REMOVE": build_array_remove, 301 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 302 "CONCAT": lambda args, dialect: exp.Concat( 303 expressions=args, 304 safe=not dialect.STRICT_STRING_CONCAT, 305 coalesce=dialect.CONCAT_COALESCE, 306 ), 307 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 308 expressions=args, 309 safe=not dialect.STRICT_STRING_CONCAT, 310 coalesce=dialect.CONCAT_COALESCE, 311 ), 312 "CONVERT_TIMEZONE": build_convert_timezone, 313 "DATE_TO_DATE_STR": lambda args: exp.Cast( 314 this=seq_get(args, 0), 315 to=exp.DataType(this=exp.DataType.Type.TEXT), 316 ), 317 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 318 start=seq_get(args, 0), 319 end=seq_get(args, 1), 320 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 321 ), 322 "GENERATE_UUID": lambda args, dialect: exp.Uuid( 323 is_string=dialect.UUID_IS_STRING_TYPE or None 324 ), 325 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 326 "GREATEST": lambda args, dialect: exp.Greatest( 327 this=seq_get(args, 0), 328 expressions=args[1:], 329 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 330 ), 331 "LEAST": lambda args, dialect: exp.Least( 332 this=seq_get(args, 0), 333 expressions=args[1:], 334 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 335 ), 336 "HEX": build_hex, 337 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 338 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 339 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 340 "JSON_KEYS": lambda args, dialect: exp.JSONKeys( 341 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 342 ), 343 "LIKE": build_like, 344 "LOG": build_logarithm, 345 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 346 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 347 "LOWER": build_lower, 348 "LPAD": lambda args: build_pad(args), 349 "LEFTPAD": lambda args: build_pad(args), 350 "LTRIM": lambda args: build_trim(args), 351 "MOD": build_mod, 352 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 353 "RPAD": lambda args: build_pad(args, is_left=False), 354 "RTRIM": lambda args: build_trim(args, is_left=False), 355 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 356 if len(args) != 2 357 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 358 "STRPOS": exp.StrPosition.from_arg_list, 359 "CHARINDEX": lambda args: build_locate_strposition(args), 360 "INSTR": exp.StrPosition.from_arg_list, 361 "LOCATE": lambda args: build_locate_strposition(args), 362 "TIME_TO_TIME_STR": lambda args: exp.Cast( 363 this=seq_get(args, 0), 364 to=exp.DataType(this=exp.DataType.Type.TEXT), 365 ), 366 "TO_HEX": build_hex, 367 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 368 this=exp.Cast( 369 this=seq_get(args, 0), 370 to=exp.DataType(this=exp.DataType.Type.TEXT), 371 ), 372 start=exp.Literal.number(1), 373 length=exp.Literal.number(10), 374 ), 375 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 376 "UPPER": build_upper, 377 "UUID": lambda args, dialect: exp.Uuid(is_string=dialect.UUID_IS_STRING_TYPE or None), 378 "VAR_MAP": build_var_map, 379 } 380 381 NO_PAREN_FUNCTIONS = { 382 TokenType.CURRENT_DATE: exp.CurrentDate, 383 TokenType.CURRENT_DATETIME: exp.CurrentDate, 384 TokenType.CURRENT_TIME: exp.CurrentTime, 385 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 386 TokenType.CURRENT_USER: exp.CurrentUser, 387 TokenType.LOCALTIME: exp.Localtime, 388 TokenType.LOCALTIMESTAMP: exp.Localtimestamp, 389 TokenType.CURRENT_ROLE: exp.CurrentRole, 390 } 391 392 STRUCT_TYPE_TOKENS = { 393 TokenType.FILE, 394 TokenType.NESTED, 395 TokenType.OBJECT, 396 TokenType.STRUCT, 397 TokenType.UNION, 398 } 399 400 NESTED_TYPE_TOKENS = { 401 TokenType.ARRAY, 402 TokenType.LIST, 403 TokenType.LOWCARDINALITY, 404 TokenType.MAP, 405 TokenType.NULLABLE, 406 TokenType.RANGE, 407 *STRUCT_TYPE_TOKENS, 408 } 409 410 ENUM_TYPE_TOKENS = { 411 TokenType.DYNAMIC, 412 TokenType.ENUM, 413 TokenType.ENUM8, 414 TokenType.ENUM16, 415 } 416 417 AGGREGATE_TYPE_TOKENS = { 418 TokenType.AGGREGATEFUNCTION, 419 TokenType.SIMPLEAGGREGATEFUNCTION, 420 } 421 422 TYPE_TOKENS = { 423 TokenType.BIT, 424 TokenType.BOOLEAN, 425 TokenType.TINYINT, 426 TokenType.UTINYINT, 427 TokenType.SMALLINT, 428 TokenType.USMALLINT, 429 TokenType.INT, 430 TokenType.UINT, 431 TokenType.BIGINT, 432 TokenType.UBIGINT, 433 TokenType.BIGNUM, 434 TokenType.INT128, 435 TokenType.UINT128, 436 TokenType.INT256, 437 TokenType.UINT256, 438 TokenType.MEDIUMINT, 439 TokenType.UMEDIUMINT, 440 TokenType.FIXEDSTRING, 441 TokenType.FLOAT, 442 TokenType.DOUBLE, 443 TokenType.UDOUBLE, 444 TokenType.CHAR, 445 TokenType.NCHAR, 446 TokenType.VARCHAR, 447 TokenType.NVARCHAR, 448 TokenType.BPCHAR, 449 TokenType.TEXT, 450 TokenType.MEDIUMTEXT, 451 TokenType.LONGTEXT, 452 TokenType.BLOB, 453 TokenType.MEDIUMBLOB, 454 TokenType.LONGBLOB, 455 TokenType.BINARY, 456 TokenType.VARBINARY, 457 TokenType.JSON, 458 TokenType.JSONB, 459 TokenType.INTERVAL, 460 TokenType.TINYBLOB, 461 TokenType.TINYTEXT, 462 TokenType.TIME, 463 TokenType.TIMETZ, 464 TokenType.TIME_NS, 465 TokenType.TIMESTAMP, 466 TokenType.TIMESTAMP_S, 467 TokenType.TIMESTAMP_MS, 468 TokenType.TIMESTAMP_NS, 469 TokenType.TIMESTAMPTZ, 470 TokenType.TIMESTAMPLTZ, 471 TokenType.TIMESTAMPNTZ, 472 TokenType.DATETIME, 473 TokenType.DATETIME2, 474 TokenType.DATETIME64, 475 TokenType.SMALLDATETIME, 476 TokenType.DATE, 477 TokenType.DATE32, 478 TokenType.INT4RANGE, 479 TokenType.INT4MULTIRANGE, 480 TokenType.INT8RANGE, 481 TokenType.INT8MULTIRANGE, 482 TokenType.NUMRANGE, 483 TokenType.NUMMULTIRANGE, 484 TokenType.TSRANGE, 485 TokenType.TSMULTIRANGE, 486 TokenType.TSTZRANGE, 487 TokenType.TSTZMULTIRANGE, 488 TokenType.DATERANGE, 489 TokenType.DATEMULTIRANGE, 490 TokenType.DECIMAL, 491 TokenType.DECIMAL32, 492 TokenType.DECIMAL64, 493 TokenType.DECIMAL128, 494 TokenType.DECIMAL256, 495 TokenType.DECFLOAT, 496 TokenType.UDECIMAL, 497 TokenType.BIGDECIMAL, 498 TokenType.UUID, 499 TokenType.GEOGRAPHY, 500 TokenType.GEOGRAPHYPOINT, 501 TokenType.GEOMETRY, 502 TokenType.POINT, 503 TokenType.RING, 504 TokenType.LINESTRING, 505 TokenType.MULTILINESTRING, 506 TokenType.POLYGON, 507 TokenType.MULTIPOLYGON, 508 TokenType.HLLSKETCH, 509 TokenType.HSTORE, 510 TokenType.PSEUDO_TYPE, 511 TokenType.SUPER, 512 TokenType.SERIAL, 513 TokenType.SMALLSERIAL, 514 TokenType.BIGSERIAL, 515 TokenType.XML, 516 TokenType.YEAR, 517 TokenType.USERDEFINED, 518 TokenType.MONEY, 519 TokenType.SMALLMONEY, 520 TokenType.ROWVERSION, 521 TokenType.IMAGE, 522 TokenType.VARIANT, 523 TokenType.VECTOR, 524 TokenType.VOID, 525 TokenType.OBJECT, 526 TokenType.OBJECT_IDENTIFIER, 527 TokenType.INET, 528 TokenType.IPADDRESS, 529 TokenType.IPPREFIX, 530 TokenType.IPV4, 531 TokenType.IPV6, 532 TokenType.UNKNOWN, 533 TokenType.NOTHING, 534 TokenType.NULL, 535 TokenType.NAME, 536 TokenType.TDIGEST, 537 TokenType.DYNAMIC, 538 *ENUM_TYPE_TOKENS, 539 *NESTED_TYPE_TOKENS, 540 *AGGREGATE_TYPE_TOKENS, 541 } 542 543 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 544 TokenType.BIGINT: TokenType.UBIGINT, 545 TokenType.INT: TokenType.UINT, 546 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 547 TokenType.SMALLINT: TokenType.USMALLINT, 548 TokenType.TINYINT: TokenType.UTINYINT, 549 TokenType.DECIMAL: TokenType.UDECIMAL, 550 TokenType.DOUBLE: TokenType.UDOUBLE, 551 } 552 553 SUBQUERY_PREDICATES = { 554 TokenType.ANY: exp.Any, 555 TokenType.ALL: exp.All, 556 TokenType.EXISTS: exp.Exists, 557 TokenType.SOME: exp.Any, 558 } 559 560 RESERVED_TOKENS = { 561 *Tokenizer.SINGLE_TOKENS.values(), 562 TokenType.SELECT, 563 } - {TokenType.IDENTIFIER} 564 565 DB_CREATABLES = { 566 TokenType.DATABASE, 567 TokenType.DICTIONARY, 568 TokenType.FILE_FORMAT, 569 TokenType.MODEL, 570 TokenType.NAMESPACE, 571 TokenType.SCHEMA, 572 TokenType.SEMANTIC_VIEW, 573 TokenType.SEQUENCE, 574 TokenType.SINK, 575 TokenType.SOURCE, 576 TokenType.STAGE, 577 TokenType.STORAGE_INTEGRATION, 578 TokenType.STREAMLIT, 579 TokenType.TABLE, 580 TokenType.TAG, 581 TokenType.VIEW, 582 TokenType.WAREHOUSE, 583 } 584 585 CREATABLES = { 586 TokenType.COLUMN, 587 TokenType.CONSTRAINT, 588 TokenType.FOREIGN_KEY, 589 TokenType.FUNCTION, 590 TokenType.INDEX, 591 TokenType.PROCEDURE, 592 TokenType.TRIGGER, 593 *DB_CREATABLES, 594 } 595 596 TRIGGER_EVENTS = {TokenType.INSERT, TokenType.UPDATE, TokenType.DELETE, TokenType.TRUNCATE} 597 598 ALTERABLES = { 599 TokenType.INDEX, 600 TokenType.TABLE, 601 TokenType.VIEW, 602 TokenType.SESSION, 603 } 604 605 # Tokens that can represent identifiers 606 ID_VAR_TOKENS = { 607 TokenType.ALL, 608 TokenType.ANALYZE, 609 TokenType.ATTACH, 610 TokenType.VAR, 611 TokenType.ANTI, 612 TokenType.APPLY, 613 TokenType.ASC, 614 TokenType.ASOF, 615 TokenType.AUTO_INCREMENT, 616 TokenType.BEGIN, 617 TokenType.BPCHAR, 618 TokenType.CACHE, 619 TokenType.CASE, 620 TokenType.COLLATE, 621 TokenType.COMMAND, 622 TokenType.COMMENT, 623 TokenType.COMMIT, 624 TokenType.CONSTRAINT, 625 TokenType.COPY, 626 TokenType.CUBE, 627 TokenType.CURRENT_SCHEMA, 628 TokenType.DEFAULT, 629 TokenType.DELETE, 630 TokenType.DESC, 631 TokenType.DESCRIBE, 632 TokenType.DETACH, 633 TokenType.DICTIONARY, 634 TokenType.DIV, 635 TokenType.END, 636 TokenType.EXECUTE, 637 TokenType.EXPORT, 638 TokenType.ESCAPE, 639 TokenType.FALSE, 640 TokenType.FIRST, 641 TokenType.FILTER, 642 TokenType.FINAL, 643 TokenType.FORMAT, 644 TokenType.FULL, 645 TokenType.GET, 646 TokenType.IDENTIFIER, 647 TokenType.INOUT, 648 TokenType.IS, 649 TokenType.ISNULL, 650 TokenType.INTERVAL, 651 TokenType.KEEP, 652 TokenType.KILL, 653 TokenType.LEFT, 654 TokenType.LIMIT, 655 TokenType.LOAD, 656 TokenType.LOCK, 657 TokenType.MATCH, 658 TokenType.MERGE, 659 TokenType.NATURAL, 660 TokenType.NEXT, 661 TokenType.OFFSET, 662 TokenType.OPERATOR, 663 TokenType.ORDINALITY, 664 TokenType.OVER, 665 TokenType.OVERLAPS, 666 TokenType.OVERWRITE, 667 TokenType.PARTITION, 668 TokenType.PERCENT, 669 TokenType.PIVOT, 670 TokenType.PRAGMA, 671 TokenType.PUT, 672 TokenType.RANGE, 673 TokenType.RECURSIVE, 674 TokenType.REFERENCES, 675 TokenType.REFRESH, 676 TokenType.RENAME, 677 TokenType.REPLACE, 678 TokenType.RIGHT, 679 TokenType.ROLLUP, 680 TokenType.ROW, 681 TokenType.ROWS, 682 TokenType.SEMI, 683 TokenType.SET, 684 TokenType.SETTINGS, 685 TokenType.SHOW, 686 TokenType.TEMPORARY, 687 TokenType.TOP, 688 TokenType.TRUE, 689 TokenType.TRUNCATE, 690 TokenType.UNIQUE, 691 TokenType.UNNEST, 692 TokenType.UNPIVOT, 693 TokenType.UPDATE, 694 TokenType.USE, 695 TokenType.VOLATILE, 696 TokenType.WINDOW, 697 *ALTERABLES, 698 *CREATABLES, 699 *SUBQUERY_PREDICATES, 700 *TYPE_TOKENS, 701 *NO_PAREN_FUNCTIONS, 702 } 703 ID_VAR_TOKENS.remove(TokenType.UNION) 704 705 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 706 TokenType.ANTI, 707 TokenType.ASOF, 708 TokenType.FULL, 709 TokenType.LEFT, 710 TokenType.LOCK, 711 TokenType.NATURAL, 712 TokenType.RIGHT, 713 TokenType.SEMI, 714 TokenType.WINDOW, 715 } 716 717 ALIAS_TOKENS = ID_VAR_TOKENS 718 719 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 720 721 ARRAY_CONSTRUCTORS = { 722 "ARRAY": exp.Array, 723 "LIST": exp.List, 724 } 725 726 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 727 728 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 729 730 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 731 732 FUNC_TOKENS = { 733 TokenType.COLLATE, 734 TokenType.COMMAND, 735 TokenType.CURRENT_DATE, 736 TokenType.CURRENT_DATETIME, 737 TokenType.CURRENT_SCHEMA, 738 TokenType.CURRENT_TIMESTAMP, 739 TokenType.CURRENT_TIME, 740 TokenType.CURRENT_USER, 741 TokenType.CURRENT_CATALOG, 742 TokenType.FILTER, 743 TokenType.FIRST, 744 TokenType.FORMAT, 745 TokenType.GET, 746 TokenType.GLOB, 747 TokenType.IDENTIFIER, 748 TokenType.INDEX, 749 TokenType.ISNULL, 750 TokenType.ILIKE, 751 TokenType.INSERT, 752 TokenType.LIKE, 753 TokenType.LOCALTIME, 754 TokenType.LOCALTIMESTAMP, 755 TokenType.MERGE, 756 TokenType.NEXT, 757 TokenType.OFFSET, 758 TokenType.PRIMARY_KEY, 759 TokenType.RANGE, 760 TokenType.REPLACE, 761 TokenType.RLIKE, 762 TokenType.ROW, 763 TokenType.SESSION_USER, 764 TokenType.UNNEST, 765 TokenType.VAR, 766 TokenType.LEFT, 767 TokenType.RIGHT, 768 TokenType.SEQUENCE, 769 TokenType.DATE, 770 TokenType.DATETIME, 771 TokenType.TABLE, 772 TokenType.TIMESTAMP, 773 TokenType.TIMESTAMPTZ, 774 TokenType.TRUNCATE, 775 TokenType.UTC_DATE, 776 TokenType.UTC_TIME, 777 TokenType.UTC_TIMESTAMP, 778 TokenType.WINDOW, 779 TokenType.XOR, 780 *TYPE_TOKENS, 781 *SUBQUERY_PREDICATES, 782 } 783 784 CONJUNCTION: t.ClassVar[t.Dict[TokenType, t.Type[exp.Expression]]] = { 785 TokenType.AND: exp.And, 786 } 787 788 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 789 TokenType.COLON_EQ: exp.PropertyEQ, 790 } 791 792 DISJUNCTION: t.ClassVar[t.Dict[TokenType, t.Type[exp.Expression]]] = { 793 TokenType.OR: exp.Or, 794 } 795 796 EQUALITY = { 797 TokenType.EQ: exp.EQ, 798 TokenType.NEQ: exp.NEQ, 799 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 800 } 801 802 COMPARISON = { 803 TokenType.GT: exp.GT, 804 TokenType.GTE: exp.GTE, 805 TokenType.LT: exp.LT, 806 TokenType.LTE: exp.LTE, 807 } 808 809 BITWISE = { 810 TokenType.AMP: exp.BitwiseAnd, 811 TokenType.CARET: exp.BitwiseXor, 812 TokenType.PIPE: exp.BitwiseOr, 813 } 814 815 TERM = { 816 TokenType.DASH: exp.Sub, 817 TokenType.PLUS: exp.Add, 818 TokenType.MOD: exp.Mod, 819 TokenType.COLLATE: exp.Collate, 820 } 821 822 FACTOR = { 823 TokenType.DIV: exp.IntDiv, 824 TokenType.LR_ARROW: exp.Distance, 825 TokenType.SLASH: exp.Div, 826 TokenType.STAR: exp.Mul, 827 } 828 829 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 830 831 TIMES = { 832 TokenType.TIME, 833 TokenType.TIMETZ, 834 } 835 836 TIMESTAMPS = { 837 TokenType.TIMESTAMP, 838 TokenType.TIMESTAMPNTZ, 839 TokenType.TIMESTAMPTZ, 840 TokenType.TIMESTAMPLTZ, 841 *TIMES, 842 } 843 844 SET_OPERATIONS = { 845 TokenType.UNION, 846 TokenType.INTERSECT, 847 TokenType.EXCEPT, 848 } 849 850 JOIN_METHODS = { 851 TokenType.ASOF, 852 TokenType.NATURAL, 853 TokenType.POSITIONAL, 854 } 855 856 JOIN_SIDES = { 857 TokenType.LEFT, 858 TokenType.RIGHT, 859 TokenType.FULL, 860 } 861 862 JOIN_KINDS = { 863 TokenType.ANTI, 864 TokenType.CROSS, 865 TokenType.INNER, 866 TokenType.OUTER, 867 TokenType.SEMI, 868 TokenType.STRAIGHT_JOIN, 869 } 870 871 JOIN_HINTS: t.Set[str] = set() 872 873 LAMBDAS = { 874 TokenType.ARROW: lambda self, expressions: self.expression( 875 exp.Lambda, 876 this=self._replace_lambda( 877 self._parse_disjunction(), 878 expressions, 879 ), 880 expressions=expressions, 881 ), 882 TokenType.FARROW: lambda self, expressions: self.expression( 883 exp.Kwarg, 884 this=exp.var(expressions[0].name), 885 expression=self._parse_disjunction(), 886 ), 887 } 888 889 COLUMN_OPERATORS = { 890 TokenType.DOT: None, 891 TokenType.DOTCOLON: lambda self, this, to: self.expression( 892 exp.JSONCast, 893 this=this, 894 to=to, 895 ), 896 TokenType.DCOLON: lambda self, this, to: self.build_cast( 897 strict=self.STRICT_CAST, this=this, to=to 898 ), 899 TokenType.ARROW: lambda self, this, path: self.expression( 900 exp.JSONExtract, 901 this=this, 902 expression=self.dialect.to_json_path(path), 903 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 904 ), 905 TokenType.DARROW: lambda self, this, path: self.expression( 906 exp.JSONExtractScalar, 907 this=this, 908 expression=self.dialect.to_json_path(path), 909 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 910 scalar_only=self.dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY, 911 ), 912 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 913 exp.JSONBExtract, 914 this=this, 915 expression=path, 916 ), 917 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 918 exp.JSONBExtractScalar, 919 this=this, 920 expression=path, 921 ), 922 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 923 exp.JSONBContains, 924 this=this, 925 expression=key, 926 ), 927 } 928 929 CAST_COLUMN_OPERATORS = { 930 TokenType.DOTCOLON, 931 TokenType.DCOLON, 932 } 933 934 EXPRESSION_PARSERS = { 935 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 936 exp.Column: lambda self: self._parse_column(), 937 exp.ColumnDef: lambda self: self._parse_column_def(self._parse_column()), 938 exp.Condition: lambda self: self._parse_disjunction(), 939 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 940 exp.Expression: lambda self: self._parse_expression(), 941 exp.From: lambda self: self._parse_from(joins=True), 942 exp.GrantPrincipal: lambda self: self._parse_grant_principal(), 943 exp.GrantPrivilege: lambda self: self._parse_grant_privilege(), 944 exp.Group: lambda self: self._parse_group(), 945 exp.Having: lambda self: self._parse_having(), 946 exp.Hint: lambda self: self._parse_hint_body(), 947 exp.Identifier: lambda self: self._parse_id_var(), 948 exp.Join: lambda self: self._parse_join(), 949 exp.Lambda: lambda self: self._parse_lambda(), 950 exp.Lateral: lambda self: self._parse_lateral(), 951 exp.Limit: lambda self: self._parse_limit(), 952 exp.Offset: lambda self: self._parse_offset(), 953 exp.Order: lambda self: self._parse_order(), 954 exp.Ordered: lambda self: self._parse_ordered(), 955 exp.Properties: lambda self: self._parse_properties(), 956 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 957 exp.Qualify: lambda self: self._parse_qualify(), 958 exp.Returning: lambda self: self._parse_returning(), 959 exp.Select: lambda self: self._parse_select(), 960 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 961 exp.Table: lambda self: self._parse_table_parts(), 962 exp.TableAlias: lambda self: self._parse_table_alias(), 963 exp.Tuple: lambda self: self._parse_value(values=False), 964 exp.Whens: lambda self: self._parse_when_matched(), 965 exp.Where: lambda self: self._parse_where(), 966 exp.Window: lambda self: self._parse_named_window(), 967 exp.With: lambda self: self._parse_with(), 968 "JOIN_TYPE": lambda self: self._parse_join_parts(), 969 } 970 971 STATEMENT_PARSERS = { 972 TokenType.ALTER: lambda self: self._parse_alter(), 973 TokenType.ANALYZE: lambda self: self._parse_analyze(), 974 TokenType.BEGIN: lambda self: self._parse_transaction(), 975 TokenType.CACHE: lambda self: self._parse_cache(), 976 TokenType.COMMENT: lambda self: self._parse_comment(), 977 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 978 TokenType.COPY: lambda self: self._parse_copy(), 979 TokenType.CREATE: lambda self: self._parse_create(), 980 TokenType.DELETE: lambda self: self._parse_delete(), 981 TokenType.DESC: lambda self: self._parse_describe(), 982 TokenType.DESCRIBE: lambda self: self._parse_describe(), 983 TokenType.DROP: lambda self: self._parse_drop(), 984 TokenType.GRANT: lambda self: self._parse_grant(), 985 TokenType.REVOKE: lambda self: self._parse_revoke(), 986 TokenType.INSERT: lambda self: self._parse_insert(), 987 TokenType.KILL: lambda self: self._parse_kill(), 988 TokenType.LOAD: lambda self: self._parse_load(), 989 TokenType.MERGE: lambda self: self._parse_merge(), 990 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 991 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 992 TokenType.REFRESH: lambda self: self._parse_refresh(), 993 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 994 TokenType.SET: lambda self: self._parse_set(), 995 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 996 TokenType.UNCACHE: lambda self: self._parse_uncache(), 997 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 998 TokenType.UPDATE: lambda self: self._parse_update(), 999 TokenType.USE: lambda self: self._parse_use(), 1000 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 1001 } 1002 1003 UNARY_PARSERS = { 1004 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 1005 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 1006 TokenType.TILDE: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 1007 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 1008 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 1009 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 1010 } 1011 1012 STRING_PARSERS = { 1013 TokenType.HEREDOC_STRING: lambda self, token: self.expression(exp.RawString, token=token), 1014 TokenType.NATIONAL_STRING: lambda self, token: self.expression(exp.National, token=token), 1015 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, token=token), 1016 TokenType.STRING: lambda self, token: self.expression( 1017 exp.Literal, token=token, is_string=True 1018 ), 1019 TokenType.UNICODE_STRING: lambda self, token: self.expression( 1020 exp.UnicodeString, 1021 token=token, 1022 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 1023 ), 1024 } 1025 1026 NUMERIC_PARSERS = { 1027 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, token=token), 1028 TokenType.BYTE_STRING: lambda self, token: self.expression( 1029 exp.ByteString, 1030 token=token, 1031 is_bytes=self.dialect.BYTE_STRING_IS_BYTES_TYPE or None, 1032 ), 1033 TokenType.HEX_STRING: lambda self, token: self.expression( 1034 exp.HexString, 1035 token=token, 1036 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 1037 ), 1038 TokenType.NUMBER: lambda self, token: self.expression( 1039 exp.Literal, token=token, is_string=False 1040 ), 1041 } 1042 1043 PRIMARY_PARSERS = { 1044 **STRING_PARSERS, 1045 **NUMERIC_PARSERS, 1046 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 1047 TokenType.NULL: lambda self, _: self.expression(exp.Null), 1048 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 1049 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 1050 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 1051 TokenType.STAR: lambda self, _: self._parse_star_ops(), 1052 } 1053 1054 PLACEHOLDER_PARSERS = { 1055 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 1056 TokenType.PARAMETER: lambda self: self._parse_parameter(), 1057 TokenType.COLON: lambda self: ( 1058 self.expression(exp.Placeholder, this=self._prev.text) 1059 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 1060 else None 1061 ), 1062 } 1063 1064 RANGE_PARSERS = { 1065 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 1066 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 1067 TokenType.GLOB: binary_range_parser(exp.Glob), 1068 TokenType.ILIKE: binary_range_parser(exp.ILike), 1069 TokenType.IN: lambda self, this: self._parse_in(this), 1070 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 1071 TokenType.IS: lambda self, this: self._parse_is(this), 1072 TokenType.LIKE: binary_range_parser(exp.Like), 1073 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 1074 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 1075 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 1076 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 1077 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 1078 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 1079 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 1080 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 1081 TokenType.ADJACENT: binary_range_parser(exp.Adjacent), 1082 TokenType.OPERATOR: lambda self, this: self._parse_operator(this), 1083 TokenType.AMP_LT: binary_range_parser(exp.ExtendsLeft), 1084 TokenType.AMP_GT: binary_range_parser(exp.ExtendsRight), 1085 } 1086 1087 PIPE_SYNTAX_TRANSFORM_PARSERS = { 1088 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 1089 "AS": lambda self, query: self._build_pipe_cte( 1090 query, [exp.Star()], self._parse_table_alias() 1091 ), 1092 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 1093 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 1094 "ORDER BY": lambda self, query: query.order_by( 1095 self._parse_order(), append=False, copy=False 1096 ), 1097 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1098 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 1099 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 1100 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1101 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 1102 } 1103 1104 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 1105 "ALLOWED_VALUES": lambda self: self.expression( 1106 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 1107 ), 1108 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 1109 "AUTO": lambda self: self._parse_auto_property(), 1110 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 1111 "BACKUP": lambda self: self.expression( 1112 exp.BackupProperty, this=self._parse_var(any_token=True) 1113 ), 1114 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 1115 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1116 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1117 "CHECKSUM": lambda self: self._parse_checksum(), 1118 "CLUSTER BY": lambda self: self._parse_cluster(), 1119 "CLUSTERED": lambda self: self._parse_clustered_by(), 1120 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 1121 exp.CollateProperty, **kwargs 1122 ), 1123 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 1124 "CONTAINS": lambda self: self._parse_contains_property(), 1125 "COPY": lambda self: self._parse_copy_property(), 1126 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 1127 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 1128 "DEFINER": lambda self: self._parse_definer(), 1129 "DETERMINISTIC": lambda self: self.expression( 1130 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1131 ), 1132 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 1133 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 1134 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 1135 "DISTKEY": lambda self: self._parse_distkey(), 1136 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1137 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 1138 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1139 "ENVIRONMENT": lambda self: self.expression( 1140 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1141 ), 1142 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1143 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1144 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1145 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1146 "FREESPACE": lambda self: self._parse_freespace(), 1147 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1148 "HEAP": lambda self: self.expression(exp.HeapProperty), 1149 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1150 "IMMUTABLE": lambda self: self.expression( 1151 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1152 ), 1153 "INHERITS": lambda self: self.expression( 1154 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1155 ), 1156 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1157 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1158 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1159 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1160 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1161 "LIKE": lambda self: self._parse_create_like(), 1162 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1163 "LOCK": lambda self: self._parse_locking(), 1164 "LOCKING": lambda self: self._parse_locking(), 1165 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1166 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1167 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1168 "MODIFIES": lambda self: self._parse_modifies_property(), 1169 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1170 "NO": lambda self: self._parse_no_property(), 1171 "ON": lambda self: self._parse_on_property(), 1172 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1173 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1174 "PARTITION": lambda self: self._parse_partitioned_of(), 1175 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1176 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1177 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1178 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1179 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1180 "READS": lambda self: self._parse_reads_property(), 1181 "REMOTE": lambda self: self._parse_remote_with_connection(), 1182 "RETURNS": lambda self: self._parse_returns(), 1183 "STRICT": lambda self: self.expression(exp.StrictProperty), 1184 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1185 "ROW": lambda self: self._parse_row(), 1186 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1187 "SAMPLE": lambda self: self.expression( 1188 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1189 ), 1190 "SECURE": lambda self: self.expression(exp.SecureProperty), 1191 "SECURITY": lambda self: self._parse_security(), 1192 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1193 "SETTINGS": lambda self: self._parse_settings_property(), 1194 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1195 "SORTKEY": lambda self: self._parse_sortkey(), 1196 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1197 "STABLE": lambda self: self.expression( 1198 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1199 ), 1200 "STORED": lambda self: self._parse_stored(), 1201 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1202 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1203 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1204 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1205 "TO": lambda self: self._parse_to_table(), 1206 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1207 "TRANSFORM": lambda self: self.expression( 1208 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1209 ), 1210 "TTL": lambda self: self._parse_ttl(), 1211 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1212 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1213 "VOLATILE": lambda self: self._parse_volatile_property(), 1214 "WITH": lambda self: self._parse_with_property(), 1215 } 1216 1217 CONSTRAINT_PARSERS = { 1218 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1219 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1220 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1221 "CHARACTER SET": lambda self: self.expression( 1222 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1223 ), 1224 "CHECK": lambda self: self._parse_check_constraint(), 1225 "COLLATE": lambda self: self.expression( 1226 exp.CollateColumnConstraint, 1227 this=self._parse_identifier() or self._parse_column(), 1228 ), 1229 "COMMENT": lambda self: self.expression( 1230 exp.CommentColumnConstraint, this=self._parse_string() 1231 ), 1232 "COMPRESS": lambda self: self._parse_compress(), 1233 "CLUSTERED": lambda self: self.expression( 1234 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1235 ), 1236 "NONCLUSTERED": lambda self: self.expression( 1237 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1238 ), 1239 "DEFAULT": lambda self: self.expression( 1240 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1241 ), 1242 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1243 "EPHEMERAL": lambda self: self.expression( 1244 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1245 ), 1246 "EXCLUDE": lambda self: self.expression( 1247 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1248 ), 1249 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1250 "FORMAT": lambda self: self.expression( 1251 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1252 ), 1253 "GENERATED": lambda self: self._parse_generated_as_identity(), 1254 "IDENTITY": lambda self: self._parse_auto_increment(), 1255 "INLINE": lambda self: self._parse_inline(), 1256 "LIKE": lambda self: self._parse_create_like(), 1257 "NOT": lambda self: self._parse_not_constraint(), 1258 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1259 "ON": lambda self: ( 1260 self._match(TokenType.UPDATE) 1261 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1262 ) 1263 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1264 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1265 "PERIOD": lambda self: self._parse_period_for_system_time(), 1266 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1267 "REFERENCES": lambda self: self._parse_references(match=False), 1268 "TITLE": lambda self: self.expression( 1269 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1270 ), 1271 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1272 "UNIQUE": lambda self: self._parse_unique(), 1273 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1274 "WITH": lambda self: self.expression( 1275 exp.Properties, expressions=self._parse_wrapped_properties() 1276 ), 1277 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1278 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1279 } 1280 1281 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1282 if not self._match(TokenType.L_PAREN, advance=False): 1283 # Partitioning by bucket or truncate follows the syntax: 1284 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1285 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1286 self._retreat(self._index - 1) 1287 return None 1288 1289 klass = ( 1290 exp.PartitionedByBucket 1291 if self._prev.text.upper() == "BUCKET" 1292 else exp.PartitionByTruncate 1293 ) 1294 1295 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1296 this, expression = seq_get(args, 0), seq_get(args, 1) 1297 1298 if isinstance(this, exp.Literal): 1299 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1300 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1301 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1302 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1303 # 1304 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1305 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1306 this, expression = expression, this 1307 1308 return self.expression(klass, this=this, expression=expression) 1309 1310 ALTER_PARSERS = { 1311 "ADD": lambda self: self._parse_alter_table_add(), 1312 "AS": lambda self: self._parse_select(), 1313 "ALTER": lambda self: self._parse_alter_table_alter(), 1314 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1315 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1316 "DROP": lambda self: self._parse_alter_table_drop(), 1317 "RENAME": lambda self: self._parse_alter_table_rename(), 1318 "SET": lambda self: self._parse_alter_table_set(), 1319 "SWAP": lambda self: self.expression( 1320 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1321 ), 1322 } 1323 1324 ALTER_ALTER_PARSERS = { 1325 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1326 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1327 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1328 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1329 } 1330 1331 SCHEMA_UNNAMED_CONSTRAINTS = { 1332 "CHECK", 1333 "EXCLUDE", 1334 "FOREIGN KEY", 1335 "LIKE", 1336 "PERIOD", 1337 "PRIMARY KEY", 1338 "UNIQUE", 1339 "BUCKET", 1340 "TRUNCATE", 1341 } 1342 1343 NO_PAREN_FUNCTION_PARSERS = { 1344 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1345 "CASE": lambda self: self._parse_case(), 1346 "CONNECT_BY_ROOT": lambda self: self.expression( 1347 exp.ConnectByRoot, this=self._parse_column() 1348 ), 1349 "IF": lambda self: self._parse_if(), 1350 } 1351 1352 INVALID_FUNC_NAME_TOKENS = { 1353 TokenType.IDENTIFIER, 1354 TokenType.STRING, 1355 } 1356 1357 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1358 1359 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1360 1361 FUNCTION_PARSERS = { 1362 **{ 1363 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1364 }, 1365 **{ 1366 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1367 }, 1368 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1369 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1370 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1371 "CHAR": lambda self: self._parse_char(), 1372 "CHR": lambda self: self._parse_char(), 1373 "DECODE": lambda self: self._parse_decode(), 1374 "EXTRACT": lambda self: self._parse_extract(), 1375 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1376 "GAP_FILL": lambda self: self._parse_gap_fill(), 1377 "INITCAP": lambda self: self._parse_initcap(), 1378 "JSON_OBJECT": lambda self: self._parse_json_object(), 1379 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1380 "JSON_TABLE": lambda self: self._parse_json_table(), 1381 "MATCH": lambda self: self._parse_match_against(), 1382 "NORMALIZE": lambda self: self._parse_normalize(), 1383 "OPENJSON": lambda self: self._parse_open_json(), 1384 "OVERLAY": lambda self: self._parse_overlay(), 1385 "POSITION": lambda self: self._parse_position(), 1386 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1387 "STRING_AGG": lambda self: self._parse_string_agg(), 1388 "SUBSTRING": lambda self: self._parse_substring(), 1389 "TRIM": lambda self: self._parse_trim(), 1390 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1391 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1392 "XMLELEMENT": lambda self: self._parse_xml_element(), 1393 "XMLTABLE": lambda self: self._parse_xml_table(), 1394 } 1395 1396 QUERY_MODIFIER_PARSERS = { 1397 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1398 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1399 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1400 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1401 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1402 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1403 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1404 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1405 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1406 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1407 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1408 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1409 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1410 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1411 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1412 TokenType.CLUSTER_BY: lambda self: ( 1413 "cluster", 1414 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1415 ), 1416 TokenType.DISTRIBUTE_BY: lambda self: ( 1417 "distribute", 1418 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1419 ), 1420 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1421 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1422 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1423 } 1424 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1425 1426 SET_PARSERS = { 1427 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1428 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1429 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1430 "TRANSACTION": lambda self: self._parse_set_transaction(), 1431 } 1432 1433 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1434 1435 TYPE_LITERAL_PARSERS = { 1436 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1437 } 1438 1439 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1440 1441 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1442 1443 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1444 1445 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1446 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1447 "ISOLATION": ( 1448 ("LEVEL", "REPEATABLE", "READ"), 1449 ("LEVEL", "READ", "COMMITTED"), 1450 ("LEVEL", "READ", "UNCOMITTED"), 1451 ("LEVEL", "SERIALIZABLE"), 1452 ), 1453 "READ": ("WRITE", "ONLY"), 1454 } 1455 1456 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1457 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1458 ) 1459 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1460 1461 TRIGGER_TIMING: OPTIONS_TYPE = { 1462 "INSTEAD": (("OF",),), 1463 "BEFORE": tuple(), 1464 "AFTER": tuple(), 1465 } 1466 1467 TRIGGER_DEFERRABLE: OPTIONS_TYPE = { 1468 "NOT": (("DEFERRABLE",),), 1469 "DEFERRABLE": tuple(), 1470 } 1471 1472 CREATE_SEQUENCE: OPTIONS_TYPE = { 1473 "SCALE": ("EXTEND", "NOEXTEND"), 1474 "SHARD": ("EXTEND", "NOEXTEND"), 1475 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1476 **dict.fromkeys( 1477 ( 1478 "SESSION", 1479 "GLOBAL", 1480 "KEEP", 1481 "NOKEEP", 1482 "ORDER", 1483 "NOORDER", 1484 "NOCACHE", 1485 "CYCLE", 1486 "NOCYCLE", 1487 "NOMINVALUE", 1488 "NOMAXVALUE", 1489 "NOSCALE", 1490 "NOSHARD", 1491 ), 1492 tuple(), 1493 ), 1494 } 1495 1496 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1497 1498 USABLES: OPTIONS_TYPE = dict.fromkeys( 1499 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1500 ) 1501 1502 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1503 1504 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1505 "TYPE": ("EVOLUTION",), 1506 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1507 } 1508 1509 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1510 1511 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1512 1513 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1514 "NOT": ("ENFORCED",), 1515 "MATCH": ( 1516 "FULL", 1517 "PARTIAL", 1518 "SIMPLE", 1519 ), 1520 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1521 "USING": ( 1522 "BTREE", 1523 "HASH", 1524 ), 1525 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1526 } 1527 1528 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1529 "NO": ("OTHERS",), 1530 "CURRENT": ("ROW",), 1531 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1532 } 1533 1534 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1535 1536 CLONE_KEYWORDS = {"CLONE", "COPY"} 1537 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1538 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1539 1540 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1541 1542 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1543 1544 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1545 1546 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1547 1548 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.RANGE, TokenType.ROWS} 1549 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1550 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1551 1552 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1553 1554 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1555 1556 ADD_CONSTRAINT_TOKENS = { 1557 TokenType.CONSTRAINT, 1558 TokenType.FOREIGN_KEY, 1559 TokenType.INDEX, 1560 TokenType.KEY, 1561 TokenType.PRIMARY_KEY, 1562 TokenType.UNIQUE, 1563 } 1564 1565 DISTINCT_TOKENS = {TokenType.DISTINCT} 1566 1567 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1568 1569 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1570 1571 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1572 1573 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1574 1575 ODBC_DATETIME_LITERALS: t.Dict[str, t.Type[exp.Expression]] = {} 1576 1577 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1578 1579 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1580 1581 # The style options for the DESCRIBE statement 1582 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1583 1584 SET_ASSIGNMENT_DELIMITERS = {"=", ":=", "TO"} 1585 1586 # The style options for the ANALYZE statement 1587 ANALYZE_STYLES = { 1588 "BUFFER_USAGE_LIMIT", 1589 "FULL", 1590 "LOCAL", 1591 "NO_WRITE_TO_BINLOG", 1592 "SAMPLE", 1593 "SKIP_LOCKED", 1594 "VERBOSE", 1595 } 1596 1597 ANALYZE_EXPRESSION_PARSERS = { 1598 "ALL": lambda self: self._parse_analyze_columns(), 1599 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1600 "DELETE": lambda self: self._parse_analyze_delete(), 1601 "DROP": lambda self: self._parse_analyze_histogram(), 1602 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1603 "LIST": lambda self: self._parse_analyze_list(), 1604 "PREDICATE": lambda self: self._parse_analyze_columns(), 1605 "UPDATE": lambda self: self._parse_analyze_histogram(), 1606 "VALIDATE": lambda self: self._parse_analyze_validate(), 1607 } 1608 1609 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1610 1611 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1612 1613 OPERATION_MODIFIERS: t.Set[str] = set() 1614 1615 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1616 1617 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows, exp.Values) 1618 1619 STRICT_CAST = True 1620 1621 PREFIXED_PIVOT_COLUMNS = False 1622 IDENTIFY_PIVOT_STRINGS = False 1623 1624 LOG_DEFAULTS_TO_LN = False 1625 1626 # Whether the table sample clause expects CSV syntax 1627 TABLESAMPLE_CSV = False 1628 1629 # The default method used for table sampling 1630 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1631 1632 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1633 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1634 1635 # Whether the TRIM function expects the characters to trim as its first argument 1636 TRIM_PATTERN_FIRST = False 1637 1638 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1639 STRING_ALIASES = False 1640 1641 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1642 MODIFIERS_ATTACHED_TO_SET_OP = True 1643 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1644 1645 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1646 NO_PAREN_IF_COMMANDS = True 1647 1648 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1649 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1650 1651 # Whether the `:` operator is used to extract a value from a VARIANT column 1652 COLON_IS_VARIANT_EXTRACT = False 1653 1654 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1655 # If this is True and '(' is not found, the keyword will be treated as an identifier 1656 VALUES_FOLLOWED_BY_PAREN = True 1657 1658 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1659 SUPPORTS_IMPLICIT_UNNEST = False 1660 1661 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1662 INTERVAL_SPANS = True 1663 1664 # Whether a PARTITION clause can follow a table reference 1665 SUPPORTS_PARTITION_SELECTION = False 1666 1667 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1668 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1669 1670 # Whether the 'AS' keyword is optional in the CTE definition syntax 1671 OPTIONAL_ALIAS_TOKEN_CTE = True 1672 1673 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1674 ALTER_RENAME_REQUIRES_COLUMN = True 1675 1676 # Whether Alter statements are allowed to contain Partition specifications 1677 ALTER_TABLE_PARTITIONS = False 1678 1679 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1680 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1681 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1682 # as BigQuery, where all joins have the same precedence. 1683 JOINS_HAVE_EQUAL_PRECEDENCE = False 1684 1685 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1686 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1687 1688 # Whether map literals support arbitrary expressions as keys. 1689 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1690 # When False, keys are typically restricted to identifiers. 1691 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1692 1693 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1694 # is true for Snowflake but not for BigQuery which can also process strings 1695 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1696 1697 # Dialects like Databricks support JOINS without join criteria 1698 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1699 ADD_JOIN_ON_TRUE = False 1700 1701 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1702 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1703 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT = False 1704 1705 # Autofilled 1706 SHOW_TRIE: t.Dict = {} 1707 SET_TRIE: t.Dict = {} 1708 1709 def __init__( 1710 self, 1711 error_level: t.Optional[ErrorLevel] = None, 1712 error_message_context: int = 100, 1713 max_errors: int = 3, 1714 dialect: DialectType = None, 1715 ): 1716 from sqlglot.dialects import Dialect 1717 1718 self._core = ParserCore( 1719 error_level=error_level or ErrorLevel.IMMEDIATE, 1720 error_message_context=error_message_context, 1721 max_errors=max_errors, 1722 dialect=Dialect.get_or_raise(dialect), 1723 ) 1724 self._match = self._core._match 1725 self._match_set = self._core._match_set 1726 self._match_pair = self._core._match_pair 1727 self._match_texts = self._core._match_texts 1728 self._match_text_seq = self._core._match_text_seq 1729 self._advance = self._core._advance 1730 self._advance_chunk = self._core._advance_chunk 1731 self._retreat = self._core._retreat 1732 self._add_comments = self._core._add_comments 1733 self._is_connected = self._core._is_connected 1734 self._find_sql = self._core._find_sql 1735 self.raise_error = self._core.raise_error 1736 self.validate_expression = self._core.validate_expression 1737 self._try_parse = self._core._try_parse 1738 1739 def reset(self) -> None: 1740 self._core.reset() 1741 1742 @property 1743 def _curr(self) -> t.Any: 1744 return self._core._curr 1745 1746 @property 1747 def _next(self) -> t.Any: 1748 return self._core._next 1749 1750 @property 1751 def _prev(self) -> t.Any: 1752 return self._core._prev 1753 1754 @property 1755 def _prev_comments(self) -> t.Any: 1756 return self._core._prev_comments 1757 1758 @property 1759 def _tokens(self) -> t.List[t.Any]: 1760 return self._core._tokens 1761 1762 @property 1763 def _index(self) -> int: 1764 return self._core._index 1765 1766 @property 1767 def _chunk_index(self) -> int: 1768 return self._core._chunk_index 1769 1770 @property 1771 def errors(self) -> t.List[t.Any]: 1772 return self._core.errors 1773 1774 @property 1775 def error_level(self) -> t.Any: 1776 return self._core.error_level 1777 1778 @property 1779 def error_message_context(self) -> int: 1780 return self._core.error_message_context 1781 1782 @property 1783 def max_errors(self) -> int: 1784 return self._core.max_errors 1785 1786 @property 1787 def dialect(self) -> t.Any: 1788 return self._core.dialect 1789 1790 @property 1791 def sql(self) -> str: 1792 return self._core.sql 1793 1794 @sql.setter 1795 def sql(self, value: str) -> None: 1796 self._core.sql = value 1797 1798 @property 1799 def _chunks(self) -> t.List[t.List[t.Any]]: 1800 return self._core._chunks 1801 1802 @_chunks.setter 1803 def _chunks(self, value: t.List[t.List[t.Any]]) -> None: 1804 self._core._chunks = value 1805 1806 @property 1807 def _pipe_cte_counter(self) -> int: 1808 return self._core._pipe_cte_counter 1809 1810 @_pipe_cte_counter.setter 1811 def _pipe_cte_counter(self, value: int) -> None: 1812 self._core._pipe_cte_counter = value 1813 1814 def parse(self, raw_tokens: t.List[Token], sql: str) -> t.List[t.Optional[exp.Expression]]: 1815 """ 1816 Parses a list of tokens and returns a list of syntax trees, one tree 1817 per parsed SQL statement. 1818 1819 Args: 1820 raw_tokens: The list of tokens. 1821 sql: The original SQL string. 1822 1823 Returns: 1824 The list of the produced syntax trees. 1825 """ 1826 return self._parse( 1827 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1828 ) 1829 1830 def parse_into( 1831 self, 1832 expression_types: exp.IntoType, 1833 raw_tokens: t.List[Token], 1834 sql: t.Optional[str] = None, 1835 ) -> t.List[t.Optional[exp.Expression]]: 1836 """ 1837 Parses a list of tokens into a given Expression type. If a collection of Expression 1838 types is given instead, this method will try to parse the token list into each one 1839 of them, stopping at the first for which the parsing succeeds. 1840 1841 Args: 1842 expression_types: The expression type(s) to try and parse the token list into. 1843 raw_tokens: The list of tokens. 1844 sql: The original SQL string, used to produce helpful debug messages. 1845 1846 Returns: 1847 The target Expression. 1848 """ 1849 errors = [] 1850 for expression_type in ensure_list(expression_types): 1851 parser = self.EXPRESSION_PARSERS.get(expression_type) 1852 if not parser: 1853 raise TypeError(f"No parser registered for {expression_type}") 1854 1855 try: 1856 return self._parse(parser, raw_tokens, sql) 1857 except ParseError as e: 1858 e.errors[0]["into_expression"] = expression_type 1859 errors.append(e) 1860 1861 raise ParseError( 1862 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1863 errors=merge_errors(errors), 1864 ) from errors[-1] 1865 1866 def check_errors(self) -> None: 1867 """Logs or raises any found errors, depending on the chosen error level setting.""" 1868 if self.error_level == ErrorLevel.WARN: 1869 for error in self.errors: 1870 logger.error(str(error)) 1871 elif self.error_level == ErrorLevel.RAISE and self.errors: 1872 raise ParseError( 1873 concat_messages(self.errors, self.max_errors), 1874 errors=merge_errors(self.errors), 1875 ) 1876 1877 def expression( 1878 self, 1879 exp_class: t.Type[E], 1880 token: t.Optional[Token] = None, 1881 comments: t.Optional[t.List[str]] = None, 1882 **kwargs, 1883 ) -> E: 1884 if token: 1885 instance = exp_class(this=token.text, **kwargs) 1886 instance.update_positions(token) 1887 else: 1888 instance = exp_class(**kwargs) 1889 instance.add_comments(comments) if comments else self._add_comments(instance) 1890 return self.validate_expression(instance) 1891 1892 def _parse_batch_statements( 1893 self, 1894 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1895 sep_first_statement: bool = True, 1896 ) -> t.List[t.Optional[exp.Expression]]: 1897 expressions = [] 1898 1899 # Chunkification binds if/while statements with the first statement of the body 1900 if sep_first_statement: 1901 self._match(TokenType.BEGIN) 1902 expressions.append(parse_method(self)) 1903 1904 chunks_length = len(self._chunks) 1905 while self._chunk_index < chunks_length: 1906 self._advance_chunk() 1907 1908 if self._match(TokenType.ELSE, advance=False): 1909 return expressions 1910 1911 if not self._next and self._match(TokenType.END): 1912 expressions.append(exp.EndStatement()) 1913 continue 1914 1915 expressions.append(parse_method(self)) 1916 1917 if self._index < len(self._tokens): 1918 self.raise_error("Invalid expression / Unexpected token") 1919 1920 self.check_errors() 1921 1922 return expressions 1923 1924 def _parse( 1925 self, 1926 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1927 raw_tokens: t.List[Token], 1928 sql: t.Optional[str] = None, 1929 ) -> t.List[t.Optional[exp.Expression]]: 1930 self.reset() 1931 self.sql = sql or "" 1932 1933 total = len(raw_tokens) 1934 chunks: t.List[t.List[Token]] = [[]] 1935 1936 for i, token in enumerate(raw_tokens): 1937 if token.token_type == TokenType.SEMICOLON: 1938 if token.comments: 1939 chunks.append([token]) 1940 1941 if i < total - 1: 1942 chunks.append([]) 1943 else: 1944 chunks[-1].append(token) 1945 1946 self._chunks = chunks 1947 1948 return self._parse_batch_statements(parse_method=parse_method, sep_first_statement=False) 1949 1950 def _warn_unsupported(self) -> None: 1951 if len(self._tokens) <= 1: 1952 return 1953 1954 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1955 # interested in emitting a warning for the one being currently processed. 1956 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1957 1958 logger.warning( 1959 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1960 ) 1961 1962 def _parse_command(self) -> exp.Command: 1963 self._warn_unsupported() 1964 return self.expression( 1965 exp.Command, 1966 comments=self._prev_comments, 1967 this=self._prev.text.upper(), 1968 expression=self._parse_string(), 1969 ) 1970 1971 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1972 start = self._prev 1973 exists = self._parse_exists() if allow_exists else None 1974 1975 self._match(TokenType.ON) 1976 1977 materialized = self._match_text_seq("MATERIALIZED") 1978 kind = self._match_set(self.CREATABLES) and self._prev 1979 if not kind: 1980 return self._parse_as_command(start) 1981 1982 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1983 this = self._parse_user_defined_function(kind=kind.token_type) 1984 elif kind.token_type == TokenType.TABLE: 1985 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1986 elif kind.token_type == TokenType.COLUMN: 1987 this = self._parse_column() 1988 else: 1989 this = self._parse_id_var() 1990 1991 self._match(TokenType.IS) 1992 1993 return self.expression( 1994 exp.Comment, 1995 this=this, 1996 kind=kind.text, 1997 expression=self._parse_string(), 1998 exists=exists, 1999 materialized=materialized, 2000 ) 2001 2002 def _parse_to_table( 2003 self, 2004 ) -> exp.ToTableProperty: 2005 table = self._parse_table_parts(schema=True) 2006 return self.expression(exp.ToTableProperty, this=table) 2007 2008 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 2009 def _parse_ttl(self) -> exp.Expression: 2010 def _parse_ttl_action() -> t.Optional[exp.Expression]: 2011 this = self._parse_bitwise() 2012 2013 if self._match_text_seq("DELETE"): 2014 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 2015 if self._match_text_seq("RECOMPRESS"): 2016 return self.expression( 2017 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 2018 ) 2019 if self._match_text_seq("TO", "DISK"): 2020 return self.expression( 2021 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 2022 ) 2023 if self._match_text_seq("TO", "VOLUME"): 2024 return self.expression( 2025 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 2026 ) 2027 2028 return this 2029 2030 expressions = self._parse_csv(_parse_ttl_action) 2031 where = self._parse_where() 2032 group = self._parse_group() 2033 2034 aggregates = None 2035 if group and self._match(TokenType.SET): 2036 aggregates = self._parse_csv(self._parse_set_item) 2037 2038 return self.expression( 2039 exp.MergeTreeTTL, 2040 expressions=expressions, 2041 where=where, 2042 group=group, 2043 aggregates=aggregates, 2044 ) 2045 2046 def _parse_condition(self) -> t.Any: 2047 return self._parse_wrapped(parse_method=self._parse_expression, optional=True) 2048 2049 def _parse_block(self) -> exp.Block: 2050 return self.expression( 2051 exp.Block, 2052 expressions=self._parse_batch_statements( 2053 parse_method=lambda self: self._parse_statement() 2054 ), 2055 ) 2056 2057 def _parse_whileblock(self) -> exp.WhileBlock: 2058 return self.expression( 2059 exp.WhileBlock, 2060 this=self._parse_condition(), 2061 body=self._parse_block(), 2062 ) 2063 2064 def _parse_statement(self) -> t.Optional[exp.Expression]: 2065 if self._curr is None: 2066 return None 2067 2068 if self._match_set(self.STATEMENT_PARSERS): 2069 comments = self._prev_comments 2070 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 2071 stmt.add_comments(comments, prepend=True) 2072 return stmt 2073 2074 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 2075 return self._parse_command() 2076 2077 if self._match_text_seq("WHILE"): 2078 return self._parse_whileblock() 2079 2080 expression = self._parse_expression() 2081 expression = self._parse_set_operations(expression) if expression else self._parse_select() 2082 return self._parse_query_modifiers(expression) 2083 2084 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 2085 start = self._prev 2086 temporary = self._match(TokenType.TEMPORARY) 2087 materialized = self._match_text_seq("MATERIALIZED") 2088 2089 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 2090 if not kind: 2091 return self._parse_as_command(start) 2092 2093 concurrently = self._match_text_seq("CONCURRENTLY") 2094 if_exists = exists or self._parse_exists() 2095 2096 if kind == "COLUMN": 2097 this = self._parse_column() 2098 else: 2099 this = self._parse_table_parts( 2100 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 2101 ) 2102 2103 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 2104 2105 if self._match(TokenType.L_PAREN, advance=False): 2106 expressions = self._parse_wrapped_csv(self._parse_types) 2107 else: 2108 expressions = None 2109 2110 return self.expression( 2111 exp.Drop, 2112 exists=if_exists, 2113 this=this, 2114 expressions=expressions, 2115 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 2116 temporary=temporary, 2117 materialized=materialized, 2118 cascade=self._match_text_seq("CASCADE"), 2119 constraints=self._match_text_seq("CONSTRAINTS"), 2120 purge=self._match_text_seq("PURGE"), 2121 cluster=cluster, 2122 concurrently=concurrently, 2123 ) 2124 2125 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 2126 return ( 2127 self._match_text_seq("IF") 2128 and (not not_ or self._match(TokenType.NOT)) 2129 and self._match(TokenType.EXISTS) 2130 ) 2131 2132 def _parse_create(self) -> exp.Create | exp.Command: 2133 # Note: this can't be None because we've matched a statement parser 2134 start = self._prev 2135 2136 replace = ( 2137 start.token_type == TokenType.REPLACE 2138 or self._match_pair(TokenType.OR, TokenType.REPLACE) 2139 or self._match_pair(TokenType.OR, TokenType.ALTER) 2140 ) 2141 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 2142 2143 unique = self._match(TokenType.UNIQUE) 2144 2145 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2146 clustered = True 2147 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2148 "COLUMNSTORE" 2149 ): 2150 clustered = False 2151 else: 2152 clustered = None 2153 2154 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2155 self._advance() 2156 2157 properties = None 2158 create_token = self._match_set(self.CREATABLES) and self._prev 2159 2160 if not create_token: 2161 # exp.Properties.Location.POST_CREATE 2162 properties = self._parse_properties() 2163 create_token = self._match_set(self.CREATABLES) and self._prev 2164 2165 if not properties or not create_token: 2166 return self._parse_as_command(start) 2167 2168 create_token_type = t.cast(Token, create_token).token_type 2169 2170 concurrently = self._match_text_seq("CONCURRENTLY") 2171 exists = self._parse_exists(not_=True) 2172 this = None 2173 expression: t.Optional[exp.Expression] = None 2174 indexes = None 2175 no_schema_binding = None 2176 begin = None 2177 clone = None 2178 2179 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2180 nonlocal properties 2181 if properties and temp_props: 2182 properties.expressions.extend(temp_props.expressions) 2183 elif temp_props: 2184 properties = temp_props 2185 2186 if create_token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2187 this = self._parse_user_defined_function(kind=create_token_type) 2188 2189 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2190 extend_props(self._parse_properties()) 2191 2192 expression = self._parse_heredoc() if self._match(TokenType.ALIAS) else None 2193 extend_props(self._parse_properties()) 2194 2195 if not expression: 2196 if self._match(TokenType.COMMAND): 2197 expression = self._parse_as_command(self._prev) 2198 else: 2199 begin = self._match(TokenType.BEGIN) 2200 return_ = self._match_text_seq("RETURN") 2201 2202 if self._match(TokenType.STRING, advance=False): 2203 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2204 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2205 expression = self._parse_string() 2206 extend_props(self._parse_properties()) 2207 else: 2208 expression = ( 2209 self._parse_user_defined_function_expression() 2210 if create_token_type == TokenType.FUNCTION 2211 else self._parse_block() 2212 ) 2213 2214 if return_: 2215 expression = self.expression(exp.Return, this=expression) 2216 elif create_token_type == TokenType.INDEX: 2217 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2218 if not self._match(TokenType.ON): 2219 index = self._parse_id_var() 2220 anonymous = False 2221 else: 2222 index = None 2223 anonymous = True 2224 2225 this = self._parse_index(index=index, anonymous=anonymous) 2226 elif ( 2227 create_token_type == TokenType.CONSTRAINT and self._match(TokenType.TRIGGER) 2228 ) or create_token_type == TokenType.TRIGGER: 2229 if is_constraint := (create_token_type == TokenType.CONSTRAINT): 2230 create_token = self._prev 2231 2232 trigger_name = self._parse_id_var() 2233 if not trigger_name: 2234 return self._parse_as_command(start) 2235 2236 timing_var = self._parse_var_from_options(self.TRIGGER_TIMING, raise_unmatched=False) 2237 timing = timing_var.this if timing_var else None 2238 if not timing: 2239 return self._parse_as_command(start) 2240 2241 events = self._parse_trigger_events() 2242 if not self._match(TokenType.ON): 2243 self.raise_error("Expected ON in trigger definition") 2244 2245 table = self._parse_table_parts() 2246 referenced_table = self._parse_table_parts() if self._match(TokenType.FROM) else None 2247 deferrable, initially = self._parse_trigger_deferrable() 2248 referencing = self._parse_trigger_referencing() 2249 for_each = self._parse_trigger_for_each() 2250 when = self._match_text_seq("WHEN") and self._parse_wrapped( 2251 self._parse_disjunction, optional=True 2252 ) 2253 execute = self._parse_trigger_execute() 2254 2255 if execute is None: 2256 return self._parse_as_command(start) 2257 2258 trigger_props = self.expression( 2259 exp.TriggerProperties, 2260 table=table, 2261 timing=timing, 2262 events=events, 2263 execute=execute, 2264 constraint=is_constraint, 2265 referenced_table=referenced_table, 2266 deferrable=deferrable, 2267 initially=initially, 2268 referencing=referencing, 2269 for_each=for_each, 2270 when=when, 2271 ) 2272 2273 this = trigger_name 2274 extend_props(exp.Properties(expressions=[trigger_props] if trigger_props else [])) 2275 elif create_token_type in self.DB_CREATABLES: 2276 table_parts = self._parse_table_parts( 2277 schema=True, is_db_reference=create_token_type == TokenType.SCHEMA 2278 ) 2279 2280 # exp.Properties.Location.POST_NAME 2281 self._match(TokenType.COMMA) 2282 extend_props(self._parse_properties(before=True)) 2283 2284 this = self._parse_schema(this=table_parts) 2285 2286 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2287 extend_props(self._parse_properties()) 2288 2289 has_alias = self._match(TokenType.ALIAS) 2290 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2291 # exp.Properties.Location.POST_ALIAS 2292 extend_props(self._parse_properties()) 2293 2294 if create_token_type == TokenType.SEQUENCE: 2295 expression = self._parse_types() 2296 props = self._parse_properties() 2297 if props: 2298 sequence_props = exp.SequenceProperties() 2299 options = [] 2300 for prop in props: 2301 if isinstance(prop, exp.SequenceProperties): 2302 for arg, value in prop.args.items(): 2303 if arg == "options": 2304 options.extend(value) 2305 else: 2306 sequence_props.set(arg, value) 2307 prop.pop() 2308 2309 if options: 2310 sequence_props.set("options", options) 2311 2312 props.append("expressions", sequence_props) 2313 extend_props(props) 2314 else: 2315 expression = self._parse_ddl_select() 2316 2317 # Some dialects also support using a table as an alias instead of a SELECT. 2318 # Here we fallback to this as an alternative. 2319 if not expression and has_alias: 2320 expression = self._try_parse(self._parse_table_parts) 2321 2322 if create_token_type == TokenType.TABLE: 2323 # exp.Properties.Location.POST_EXPRESSION 2324 extend_props(self._parse_properties()) 2325 2326 indexes = [] 2327 while True: 2328 index = self._parse_index() 2329 2330 # exp.Properties.Location.POST_INDEX 2331 extend_props(self._parse_properties()) 2332 if not index: 2333 break 2334 else: 2335 self._match(TokenType.COMMA) 2336 indexes.append(index) 2337 elif create_token_type == TokenType.VIEW: 2338 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2339 no_schema_binding = True 2340 elif create_token_type in (TokenType.SINK, TokenType.SOURCE): 2341 extend_props(self._parse_properties()) 2342 2343 shallow = self._match_text_seq("SHALLOW") 2344 2345 if self._match_texts(self.CLONE_KEYWORDS): 2346 copy = self._prev.text.lower() == "copy" 2347 clone = self.expression( 2348 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2349 ) 2350 2351 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2352 return self._parse_as_command(start) 2353 2354 create_kind_text = create_token.text.upper() 2355 return self.expression( 2356 exp.Create, 2357 this=this, 2358 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2359 replace=replace, 2360 refresh=refresh, 2361 unique=unique, 2362 expression=expression, 2363 exists=exists, 2364 properties=properties, 2365 indexes=indexes, 2366 no_schema_binding=no_schema_binding, 2367 begin=begin, 2368 clone=clone, 2369 concurrently=concurrently, 2370 clustered=clustered, 2371 ) 2372 2373 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2374 seq = exp.SequenceProperties() 2375 2376 options = [] 2377 index = self._index 2378 2379 while self._curr: 2380 self._match(TokenType.COMMA) 2381 if self._match_text_seq("INCREMENT"): 2382 self._match_text_seq("BY") 2383 self._match_text_seq("=") 2384 seq.set("increment", self._parse_term()) 2385 elif self._match_text_seq("MINVALUE"): 2386 seq.set("minvalue", self._parse_term()) 2387 elif self._match_text_seq("MAXVALUE"): 2388 seq.set("maxvalue", self._parse_term()) 2389 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2390 self._match_text_seq("=") 2391 seq.set("start", self._parse_term()) 2392 elif self._match_text_seq("CACHE"): 2393 # T-SQL allows empty CACHE which is initialized dynamically 2394 seq.set("cache", self._parse_number() or True) 2395 elif self._match_text_seq("OWNED", "BY"): 2396 # "OWNED BY NONE" is the default 2397 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2398 else: 2399 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2400 if opt: 2401 options.append(opt) 2402 else: 2403 break 2404 2405 seq.set("options", options if options else None) 2406 return None if self._index == index else seq 2407 2408 def _parse_trigger_events(self) -> t.List[exp.TriggerEvent]: 2409 events = [] 2410 2411 while True: 2412 event_type = self._match_set(self.TRIGGER_EVENTS) and self._prev.text.upper() 2413 2414 if not event_type: 2415 self.raise_error("Expected trigger event (INSERT, UPDATE, DELETE, TRUNCATE)") 2416 2417 columns = ( 2418 self._parse_csv(self._parse_column) 2419 if event_type == "UPDATE" and self._match_text_seq("OF") 2420 else None 2421 ) 2422 2423 events.append(self.expression(exp.TriggerEvent, this=event_type, columns=columns)) 2424 2425 if not self._match(TokenType.OR): 2426 break 2427 2428 return events 2429 2430 def _parse_trigger_deferrable( 2431 self, 2432 ) -> t.Tuple[t.Optional[str], t.Optional[str]]: 2433 deferrable_var = self._parse_var_from_options( 2434 self.TRIGGER_DEFERRABLE, raise_unmatched=False 2435 ) 2436 deferrable = deferrable_var.this if deferrable_var else None 2437 2438 initially = None 2439 if deferrable and self._match_text_seq("INITIALLY"): 2440 initially = ( 2441 self._prev.text.upper() if self._match_texts(("IMMEDIATE", "DEFERRED")) else None 2442 ) 2443 2444 return deferrable, initially 2445 2446 def _parse_trigger_referencing_clause(self, keyword: str) -> t.Optional[exp.Expression]: 2447 if not self._match_text_seq(keyword): 2448 return None 2449 if not self._match_text_seq("TABLE"): 2450 self.raise_error(f"Expected TABLE after {keyword} in REFERENCING clause") 2451 self._match_text_seq("AS") 2452 return self._parse_id_var() 2453 2454 def _parse_trigger_referencing(self) -> t.Optional[exp.TriggerReferencing]: 2455 if not self._match_text_seq("REFERENCING"): 2456 return None 2457 2458 old_alias = None 2459 new_alias = None 2460 2461 while True: 2462 if alias := self._parse_trigger_referencing_clause("OLD"): 2463 if old_alias is not None: 2464 self.raise_error("Duplicate OLD clause in REFERENCING") 2465 old_alias = alias 2466 elif alias := self._parse_trigger_referencing_clause("NEW"): 2467 if new_alias is not None: 2468 self.raise_error("Duplicate NEW clause in REFERENCING") 2469 new_alias = alias 2470 else: 2471 break 2472 2473 if old_alias is None and new_alias is None: 2474 self.raise_error("REFERENCING clause requires at least OLD TABLE or NEW TABLE") 2475 2476 return self.expression( 2477 exp.TriggerReferencing, 2478 old=old_alias, 2479 new=new_alias, 2480 ) 2481 2482 def _parse_trigger_for_each(self) -> t.Optional[str]: 2483 if not self._match_text_seq("FOR", "EACH"): 2484 return None 2485 2486 return self._prev.text.upper() if self._match_texts(("ROW", "STATEMENT")) else None 2487 2488 def _parse_trigger_execute(self) -> t.Optional[exp.TriggerExecute]: 2489 if not self._match(TokenType.EXECUTE): 2490 return None 2491 2492 if not self._match_set((TokenType.FUNCTION, TokenType.PROCEDURE)): 2493 self.raise_error("Expected FUNCTION or PROCEDURE after EXECUTE") 2494 2495 func_call = self._parse_function(anonymous=True, optional_parens=False) 2496 return self.expression(exp.TriggerExecute, this=func_call) 2497 2498 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2499 # only used for teradata currently 2500 self._match(TokenType.COMMA) 2501 2502 kwargs = { 2503 "no": self._match_text_seq("NO"), 2504 "dual": self._match_text_seq("DUAL"), 2505 "before": self._match_text_seq("BEFORE"), 2506 "default": self._match_text_seq("DEFAULT"), 2507 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2508 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2509 "after": self._match_text_seq("AFTER"), 2510 "minimum": self._match_texts(("MIN", "MINIMUM")), 2511 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2512 } 2513 2514 if self._match_texts(self.PROPERTY_PARSERS): 2515 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2516 try: 2517 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2518 except TypeError: 2519 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2520 2521 return None 2522 2523 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2524 return self._parse_wrapped_csv(self._parse_property) 2525 2526 def _parse_property(self) -> t.Optional[exp.Expression]: 2527 if self._match_texts(self.PROPERTY_PARSERS): 2528 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2529 2530 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2531 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2532 2533 if self._match_text_seq("COMPOUND", "SORTKEY"): 2534 return self._parse_sortkey(compound=True) 2535 2536 if self._match_text_seq("SQL", "SECURITY"): 2537 return self.expression( 2538 exp.SqlSecurityProperty, 2539 this=self._match_texts(("DEFINER", "INVOKER")) and self._prev.text.upper(), 2540 ) 2541 2542 index = self._index 2543 2544 seq_props = self._parse_sequence_properties() 2545 if seq_props: 2546 return seq_props 2547 2548 self._retreat(index) 2549 key = self._parse_column() 2550 2551 if not self._match(TokenType.EQ): 2552 self._retreat(index) 2553 return None 2554 2555 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2556 if isinstance(key, exp.Column): 2557 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2558 2559 value = self._parse_bitwise() or self._parse_var(any_token=True) 2560 2561 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2562 if isinstance(value, exp.Column): 2563 value = exp.var(value.name) 2564 2565 return self.expression(exp.Property, this=key, value=value) 2566 2567 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2568 if self._match_text_seq("BY"): 2569 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2570 2571 self._match(TokenType.ALIAS) 2572 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2573 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2574 2575 return self.expression( 2576 exp.FileFormatProperty, 2577 this=( 2578 self.expression( 2579 exp.InputOutputFormat, 2580 input_format=input_format, 2581 output_format=output_format, 2582 ) 2583 if input_format or output_format 2584 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2585 ), 2586 hive_format=True, 2587 ) 2588 2589 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2590 field = self._parse_field() 2591 if isinstance(field, exp.Identifier) and not field.quoted: 2592 field = exp.var(field) 2593 2594 return field 2595 2596 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2597 self._match(TokenType.EQ) 2598 self._match(TokenType.ALIAS) 2599 2600 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2601 2602 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2603 properties = [] 2604 while True: 2605 if before: 2606 prop = self._parse_property_before() 2607 else: 2608 prop = self._parse_property() 2609 if not prop: 2610 break 2611 for p in ensure_list(prop): 2612 properties.append(p) 2613 2614 if properties: 2615 return self.expression(exp.Properties, expressions=properties) 2616 2617 return None 2618 2619 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2620 return self.expression( 2621 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2622 ) 2623 2624 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2625 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2626 security_specifier = self._prev.text.upper() 2627 return self.expression(exp.SecurityProperty, this=security_specifier) 2628 return None 2629 2630 def _parse_settings_property(self) -> exp.SettingsProperty: 2631 return self.expression( 2632 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2633 ) 2634 2635 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2636 if self._index >= 2: 2637 pre_volatile_token = self._tokens[self._index - 2] 2638 else: 2639 pre_volatile_token = None 2640 2641 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2642 return exp.VolatileProperty() 2643 2644 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2645 2646 def _parse_retention_period(self) -> exp.Var: 2647 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2648 number = self._parse_number() 2649 number_str = f"{number} " if number else "" 2650 unit = self._parse_var(any_token=True) 2651 return exp.var(f"{number_str}{unit}") 2652 2653 def _parse_system_versioning_property( 2654 self, with_: bool = False 2655 ) -> exp.WithSystemVersioningProperty: 2656 self._match(TokenType.EQ) 2657 prop = self.expression( 2658 exp.WithSystemVersioningProperty, 2659 on=True, 2660 with_=with_, 2661 ) 2662 2663 if self._match_text_seq("OFF"): 2664 prop.set("on", False) 2665 return prop 2666 2667 self._match(TokenType.ON) 2668 if self._match(TokenType.L_PAREN): 2669 while self._curr and not self._match(TokenType.R_PAREN): 2670 if self._match_text_seq("HISTORY_TABLE", "="): 2671 prop.set("this", self._parse_table_parts()) 2672 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2673 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2674 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2675 prop.set("retention_period", self._parse_retention_period()) 2676 2677 self._match(TokenType.COMMA) 2678 2679 return prop 2680 2681 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2682 self._match(TokenType.EQ) 2683 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2684 prop = self.expression(exp.DataDeletionProperty, on=on) 2685 2686 if self._match(TokenType.L_PAREN): 2687 while self._curr and not self._match(TokenType.R_PAREN): 2688 if self._match_text_seq("FILTER_COLUMN", "="): 2689 prop.set("filter_column", self._parse_column()) 2690 elif self._match_text_seq("RETENTION_PERIOD", "="): 2691 prop.set("retention_period", self._parse_retention_period()) 2692 2693 self._match(TokenType.COMMA) 2694 2695 return prop 2696 2697 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2698 kind = "HASH" 2699 expressions: t.Optional[t.List[exp.Expression]] = None 2700 if self._match_text_seq("BY", "HASH"): 2701 expressions = self._parse_wrapped_csv(self._parse_id_var) 2702 elif self._match_text_seq("BY", "RANDOM"): 2703 kind = "RANDOM" 2704 2705 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2706 buckets: t.Optional[exp.Expression] = None 2707 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2708 buckets = self._parse_number() 2709 2710 return self.expression( 2711 exp.DistributedByProperty, 2712 expressions=expressions, 2713 kind=kind, 2714 buckets=buckets, 2715 order=self._parse_order(), 2716 ) 2717 2718 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2719 self._match_text_seq("KEY") 2720 expressions = self._parse_wrapped_id_vars() 2721 return self.expression(expr_type, expressions=expressions) 2722 2723 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2724 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2725 prop = self._parse_system_versioning_property(with_=True) 2726 self._match_r_paren() 2727 return prop 2728 2729 if self._match(TokenType.L_PAREN, advance=False): 2730 return self._parse_wrapped_properties() 2731 2732 if self._match_text_seq("JOURNAL"): 2733 return self._parse_withjournaltable() 2734 2735 if self._match_texts(self.VIEW_ATTRIBUTES): 2736 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2737 2738 if self._match_text_seq("DATA"): 2739 return self._parse_withdata(no=False) 2740 elif self._match_text_seq("NO", "DATA"): 2741 return self._parse_withdata(no=True) 2742 2743 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2744 return self._parse_serde_properties(with_=True) 2745 2746 if self._match(TokenType.SCHEMA): 2747 return self.expression( 2748 exp.WithSchemaBindingProperty, 2749 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2750 ) 2751 2752 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2753 return self.expression( 2754 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2755 ) 2756 2757 if not self._next: 2758 return None 2759 2760 return self._parse_withisolatedloading() 2761 2762 def _parse_procedure_option(self) -> exp.Expression | None: 2763 if self._match_text_seq("EXECUTE", "AS"): 2764 return self.expression( 2765 exp.ExecuteAsProperty, 2766 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2767 or self._parse_string(), 2768 ) 2769 2770 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2771 2772 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2773 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2774 self._match(TokenType.EQ) 2775 2776 user = self._parse_id_var() 2777 self._match(TokenType.PARAMETER) 2778 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2779 2780 if not user or not host: 2781 return None 2782 2783 return exp.DefinerProperty(this=f"{user}@{host}") 2784 2785 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2786 self._match(TokenType.TABLE) 2787 self._match(TokenType.EQ) 2788 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2789 2790 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2791 return self.expression(exp.LogProperty, no=no) 2792 2793 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2794 return self.expression(exp.JournalProperty, **kwargs) 2795 2796 def _parse_checksum(self) -> exp.ChecksumProperty: 2797 self._match(TokenType.EQ) 2798 2799 on = None 2800 if self._match(TokenType.ON): 2801 on = True 2802 elif self._match_text_seq("OFF"): 2803 on = False 2804 2805 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2806 2807 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2808 return self.expression( 2809 exp.Cluster, 2810 expressions=( 2811 self._parse_wrapped_csv(self._parse_ordered) 2812 if wrapped 2813 else self._parse_csv(self._parse_ordered) 2814 ), 2815 ) 2816 2817 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2818 self._match_text_seq("BY") 2819 2820 self._match_l_paren() 2821 expressions = self._parse_csv(self._parse_column) 2822 self._match_r_paren() 2823 2824 if self._match_text_seq("SORTED", "BY"): 2825 self._match_l_paren() 2826 sorted_by = self._parse_csv(self._parse_ordered) 2827 self._match_r_paren() 2828 else: 2829 sorted_by = None 2830 2831 self._match(TokenType.INTO) 2832 buckets = self._parse_number() 2833 self._match_text_seq("BUCKETS") 2834 2835 return self.expression( 2836 exp.ClusteredByProperty, 2837 expressions=expressions, 2838 sorted_by=sorted_by, 2839 buckets=buckets, 2840 ) 2841 2842 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2843 if not self._match_text_seq("GRANTS"): 2844 self._retreat(self._index - 1) 2845 return None 2846 2847 return self.expression(exp.CopyGrantsProperty) 2848 2849 def _parse_freespace(self) -> exp.FreespaceProperty: 2850 self._match(TokenType.EQ) 2851 return self.expression( 2852 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2853 ) 2854 2855 def _parse_mergeblockratio( 2856 self, no: bool = False, default: bool = False 2857 ) -> exp.MergeBlockRatioProperty: 2858 if self._match(TokenType.EQ): 2859 return self.expression( 2860 exp.MergeBlockRatioProperty, 2861 this=self._parse_number(), 2862 percent=self._match(TokenType.PERCENT), 2863 ) 2864 2865 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2866 2867 def _parse_datablocksize( 2868 self, 2869 default: t.Optional[bool] = None, 2870 minimum: t.Optional[bool] = None, 2871 maximum: t.Optional[bool] = None, 2872 ) -> exp.DataBlocksizeProperty: 2873 self._match(TokenType.EQ) 2874 size = self._parse_number() 2875 2876 units = None 2877 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2878 units = self._prev.text 2879 2880 return self.expression( 2881 exp.DataBlocksizeProperty, 2882 size=size, 2883 units=units, 2884 default=default, 2885 minimum=minimum, 2886 maximum=maximum, 2887 ) 2888 2889 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2890 self._match(TokenType.EQ) 2891 always = self._match_text_seq("ALWAYS") 2892 manual = self._match_text_seq("MANUAL") 2893 never = self._match_text_seq("NEVER") 2894 default = self._match_text_seq("DEFAULT") 2895 2896 autotemp = None 2897 if self._match_text_seq("AUTOTEMP"): 2898 autotemp = self._parse_schema() 2899 2900 return self.expression( 2901 exp.BlockCompressionProperty, 2902 always=always, 2903 manual=manual, 2904 never=never, 2905 default=default, 2906 autotemp=autotemp, 2907 ) 2908 2909 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2910 index = self._index 2911 no = self._match_text_seq("NO") 2912 concurrent = self._match_text_seq("CONCURRENT") 2913 2914 if not self._match_text_seq("ISOLATED", "LOADING"): 2915 self._retreat(index) 2916 return None 2917 2918 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2919 return self.expression( 2920 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2921 ) 2922 2923 def _parse_locking(self) -> exp.LockingProperty: 2924 if self._match(TokenType.TABLE): 2925 kind = "TABLE" 2926 elif self._match(TokenType.VIEW): 2927 kind = "VIEW" 2928 elif self._match(TokenType.ROW): 2929 kind = "ROW" 2930 elif self._match_text_seq("DATABASE"): 2931 kind = "DATABASE" 2932 else: 2933 kind = None 2934 2935 if kind in ("DATABASE", "TABLE", "VIEW"): 2936 this = self._parse_table_parts() 2937 else: 2938 this = None 2939 2940 if self._match(TokenType.FOR): 2941 for_or_in = "FOR" 2942 elif self._match(TokenType.IN): 2943 for_or_in = "IN" 2944 else: 2945 for_or_in = None 2946 2947 if self._match_text_seq("ACCESS"): 2948 lock_type = "ACCESS" 2949 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2950 lock_type = "EXCLUSIVE" 2951 elif self._match_text_seq("SHARE"): 2952 lock_type = "SHARE" 2953 elif self._match_text_seq("READ"): 2954 lock_type = "READ" 2955 elif self._match_text_seq("WRITE"): 2956 lock_type = "WRITE" 2957 elif self._match_text_seq("CHECKSUM"): 2958 lock_type = "CHECKSUM" 2959 else: 2960 lock_type = None 2961 2962 override = self._match_text_seq("OVERRIDE") 2963 2964 return self.expression( 2965 exp.LockingProperty, 2966 this=this, 2967 kind=kind, 2968 for_or_in=for_or_in, 2969 lock_type=lock_type, 2970 override=override, 2971 ) 2972 2973 def _parse_partition_by(self) -> t.List[exp.Expression]: 2974 if self._match(TokenType.PARTITION_BY): 2975 return self._parse_csv(self._parse_disjunction) 2976 return [] 2977 2978 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2979 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2980 if self._match_text_seq("MINVALUE"): 2981 return exp.var("MINVALUE") 2982 if self._match_text_seq("MAXVALUE"): 2983 return exp.var("MAXVALUE") 2984 return self._parse_bitwise() 2985 2986 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2987 expression = None 2988 from_expressions = None 2989 to_expressions = None 2990 2991 if self._match(TokenType.IN): 2992 this = self._parse_wrapped_csv(self._parse_bitwise) 2993 elif self._match(TokenType.FROM): 2994 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2995 self._match_text_seq("TO") 2996 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2997 elif self._match_text_seq("WITH", "(", "MODULUS"): 2998 this = self._parse_number() 2999 self._match_text_seq(",", "REMAINDER") 3000 expression = self._parse_number() 3001 self._match_r_paren() 3002 else: 3003 self.raise_error("Failed to parse partition bound spec.") 3004 3005 return self.expression( 3006 exp.PartitionBoundSpec, 3007 this=this, 3008 expression=expression, 3009 from_expressions=from_expressions, 3010 to_expressions=to_expressions, 3011 ) 3012 3013 # https://www.postgresql.org/docs/current/sql-createtable.html 3014 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 3015 if not self._match_text_seq("OF"): 3016 self._retreat(self._index - 1) 3017 return None 3018 3019 this = self._parse_table(schema=True) 3020 3021 if self._match(TokenType.DEFAULT): 3022 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 3023 elif self._match_text_seq("FOR", "VALUES"): 3024 expression = self._parse_partition_bound_spec() 3025 else: 3026 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 3027 3028 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 3029 3030 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 3031 self._match(TokenType.EQ) 3032 return self.expression( 3033 exp.PartitionedByProperty, 3034 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 3035 ) 3036 3037 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 3038 if self._match_text_seq("AND", "STATISTICS"): 3039 statistics = True 3040 elif self._match_text_seq("AND", "NO", "STATISTICS"): 3041 statistics = False 3042 else: 3043 statistics = None 3044 3045 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 3046 3047 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 3048 if self._match_text_seq("SQL"): 3049 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 3050 return None 3051 3052 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 3053 if self._match_text_seq("SQL", "DATA"): 3054 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 3055 return None 3056 3057 def _parse_no_property(self) -> t.Optional[exp.Expression]: 3058 if self._match_text_seq("PRIMARY", "INDEX"): 3059 return exp.NoPrimaryIndexProperty() 3060 if self._match_text_seq("SQL"): 3061 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 3062 return None 3063 3064 def _parse_on_property(self) -> t.Optional[exp.Expression]: 3065 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 3066 return exp.OnCommitProperty() 3067 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 3068 return exp.OnCommitProperty(delete=True) 3069 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 3070 3071 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 3072 if self._match_text_seq("SQL", "DATA"): 3073 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 3074 return None 3075 3076 def _parse_distkey(self) -> exp.DistKeyProperty: 3077 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 3078 3079 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 3080 table = self._parse_table(schema=True) 3081 3082 options = [] 3083 while self._match_texts(("INCLUDING", "EXCLUDING")): 3084 this = self._prev.text.upper() 3085 3086 id_var = self._parse_id_var() 3087 if not id_var: 3088 return None 3089 3090 options.append( 3091 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 3092 ) 3093 3094 return self.expression(exp.LikeProperty, this=table, expressions=options) 3095 3096 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 3097 return self.expression( 3098 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 3099 ) 3100 3101 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 3102 self._match(TokenType.EQ) 3103 return self.expression( 3104 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 3105 ) 3106 3107 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 3108 self._match_text_seq("WITH", "CONNECTION") 3109 return self.expression( 3110 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 3111 ) 3112 3113 def _parse_returns(self) -> exp.ReturnsProperty: 3114 value: t.Optional[exp.Expression] 3115 null = None 3116 is_table = self._match(TokenType.TABLE) 3117 3118 if is_table: 3119 if self._match(TokenType.LT): 3120 value = self.expression( 3121 exp.Schema, 3122 this="TABLE", 3123 expressions=self._parse_csv(self._parse_struct_types), 3124 ) 3125 if not self._match(TokenType.GT): 3126 self.raise_error("Expecting >") 3127 else: 3128 value = self._parse_schema(exp.var("TABLE")) 3129 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 3130 null = True 3131 value = None 3132 else: 3133 value = self._parse_types() 3134 3135 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 3136 3137 def _parse_describe(self) -> exp.Describe: 3138 kind = self._match_set(self.CREATABLES) and self._prev.text 3139 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 3140 if self._match(TokenType.DOT): 3141 style = None 3142 self._retreat(self._index - 2) 3143 3144 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 3145 3146 if self._match_set(self.STATEMENT_PARSERS, advance=False): 3147 this = self._parse_statement() 3148 else: 3149 this = self._parse_table(schema=True) 3150 3151 properties = self._parse_properties() 3152 expressions = properties.expressions if properties else None 3153 partition = self._parse_partition() 3154 return self.expression( 3155 exp.Describe, 3156 this=this, 3157 style=style, 3158 kind=kind, 3159 expressions=expressions, 3160 partition=partition, 3161 format=format, 3162 as_json=self._match_text_seq("AS", "JSON"), 3163 ) 3164 3165 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 3166 kind = self._prev.text.upper() 3167 expressions = [] 3168 3169 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 3170 if self._match(TokenType.WHEN): 3171 expression = self._parse_disjunction() 3172 self._match(TokenType.THEN) 3173 else: 3174 expression = None 3175 3176 else_ = self._match(TokenType.ELSE) 3177 3178 if not self._match(TokenType.INTO): 3179 return None 3180 3181 return self.expression( 3182 exp.ConditionalInsert, 3183 this=self.expression( 3184 exp.Insert, 3185 this=self._parse_table(schema=True), 3186 expression=self._parse_derived_table_values(), 3187 ), 3188 expression=expression, 3189 else_=else_, 3190 ) 3191 3192 expression = parse_conditional_insert() 3193 while expression is not None: 3194 expressions.append(expression) 3195 expression = parse_conditional_insert() 3196 3197 return self.expression( 3198 exp.MultitableInserts, 3199 kind=kind, 3200 comments=comments, 3201 expressions=expressions, 3202 source=self._parse_table(), 3203 ) 3204 3205 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 3206 comments = [] 3207 hint = self._parse_hint() 3208 overwrite = self._match(TokenType.OVERWRITE) 3209 ignore = self._match(TokenType.IGNORE) 3210 local = self._match_text_seq("LOCAL") 3211 alternative = None 3212 is_function = None 3213 3214 if self._match_text_seq("DIRECTORY"): 3215 this: t.Optional[exp.Expression] = self.expression( 3216 exp.Directory, 3217 this=self._parse_var_or_string(), 3218 local=local, 3219 row_format=self._parse_row_format(match_row=True), 3220 ) 3221 else: 3222 if self._match_set((TokenType.FIRST, TokenType.ALL)): 3223 comments += ensure_list(self._prev_comments) 3224 return self._parse_multitable_inserts(comments) 3225 3226 if self._match(TokenType.OR): 3227 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 3228 3229 self._match(TokenType.INTO) 3230 comments += ensure_list(self._prev_comments) 3231 self._match(TokenType.TABLE) 3232 is_function = self._match(TokenType.FUNCTION) 3233 3234 this = self._parse_function() if is_function else self._parse_insert_table() 3235 3236 returning = self._parse_returning() # TSQL allows RETURNING before source 3237 3238 return self.expression( 3239 exp.Insert, 3240 comments=comments, 3241 hint=hint, 3242 is_function=is_function, 3243 this=this, 3244 stored=self._match_text_seq("STORED") and self._parse_stored(), 3245 by_name=self._match_text_seq("BY", "NAME"), 3246 exists=self._parse_exists(), 3247 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 3248 and self._parse_disjunction(), 3249 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 3250 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 3251 default=self._match_text_seq("DEFAULT", "VALUES"), 3252 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 3253 conflict=self._parse_on_conflict(), 3254 returning=returning or self._parse_returning(), 3255 overwrite=overwrite, 3256 alternative=alternative, 3257 ignore=ignore, 3258 source=self._match(TokenType.TABLE) and self._parse_table(), 3259 ) 3260 3261 def _parse_insert_table(self) -> t.Optional[exp.Expression]: 3262 this = self._parse_table(schema=True, parse_partition=True) 3263 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 3264 this.set("alias", self._parse_table_alias()) 3265 return this 3266 3267 def _parse_kill(self) -> exp.Kill: 3268 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 3269 3270 return self.expression( 3271 exp.Kill, 3272 this=self._parse_primary(), 3273 kind=kind, 3274 ) 3275 3276 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 3277 conflict = self._match_text_seq("ON", "CONFLICT") 3278 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 3279 3280 if not conflict and not duplicate: 3281 return None 3282 3283 conflict_keys = None 3284 constraint = None 3285 3286 if conflict: 3287 if self._match_text_seq("ON", "CONSTRAINT"): 3288 constraint = self._parse_id_var() 3289 elif self._match(TokenType.L_PAREN): 3290 conflict_keys = self._parse_csv(self._parse_id_var) 3291 self._match_r_paren() 3292 3293 index_predicate = self._parse_where() 3294 3295 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3296 if self._prev.token_type == TokenType.UPDATE: 3297 self._match(TokenType.SET) 3298 expressions = self._parse_csv(self._parse_equality) 3299 else: 3300 expressions = None 3301 3302 return self.expression( 3303 exp.OnConflict, 3304 duplicate=duplicate, 3305 expressions=expressions, 3306 action=action, 3307 conflict_keys=conflict_keys, 3308 index_predicate=index_predicate, 3309 constraint=constraint, 3310 where=self._parse_where(), 3311 ) 3312 3313 def _parse_returning(self) -> t.Optional[exp.Returning]: 3314 if not self._match(TokenType.RETURNING): 3315 return None 3316 return self.expression( 3317 exp.Returning, 3318 expressions=self._parse_csv(self._parse_expression), 3319 into=self._match(TokenType.INTO) and self._parse_table_part(), 3320 ) 3321 3322 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3323 if not self._match(TokenType.FORMAT): 3324 return None 3325 return self._parse_row_format() 3326 3327 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3328 index = self._index 3329 with_ = with_ or self._match_text_seq("WITH") 3330 3331 if not self._match(TokenType.SERDE_PROPERTIES): 3332 self._retreat(index) 3333 return None 3334 return self.expression( 3335 exp.SerdeProperties, 3336 expressions=self._parse_wrapped_properties(), 3337 with_=with_, 3338 ) 3339 3340 def _parse_row_format( 3341 self, match_row: bool = False 3342 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3343 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3344 return None 3345 3346 if self._match_text_seq("SERDE"): 3347 this = self._parse_string() 3348 3349 serde_properties = self._parse_serde_properties() 3350 3351 return self.expression( 3352 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3353 ) 3354 3355 self._match_text_seq("DELIMITED") 3356 3357 kwargs = {} 3358 3359 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3360 kwargs["fields"] = self._parse_string() 3361 if self._match_text_seq("ESCAPED", "BY"): 3362 kwargs["escaped"] = self._parse_string() 3363 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3364 kwargs["collection_items"] = self._parse_string() 3365 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3366 kwargs["map_keys"] = self._parse_string() 3367 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3368 kwargs["lines"] = self._parse_string() 3369 if self._match_text_seq("NULL", "DEFINED", "AS"): 3370 kwargs["null"] = self._parse_string() 3371 3372 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3373 3374 def _parse_load(self) -> exp.LoadData | exp.Command: 3375 if self._match_text_seq("DATA"): 3376 local = self._match_text_seq("LOCAL") 3377 self._match_text_seq("INPATH") 3378 inpath = self._parse_string() 3379 overwrite = self._match(TokenType.OVERWRITE) 3380 self._match_pair(TokenType.INTO, TokenType.TABLE) 3381 3382 return self.expression( 3383 exp.LoadData, 3384 this=self._parse_table(schema=True), 3385 local=local, 3386 overwrite=overwrite, 3387 inpath=inpath, 3388 partition=self._parse_partition(), 3389 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3390 serde=self._match_text_seq("SERDE") and self._parse_string(), 3391 ) 3392 return self._parse_as_command(self._prev) 3393 3394 def _parse_delete(self) -> exp.Delete: 3395 # This handles MySQL's "Multiple-Table Syntax" 3396 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3397 tables = None 3398 if not self._match(TokenType.FROM, advance=False): 3399 tables = self._parse_csv(self._parse_table) or None 3400 3401 returning = self._parse_returning() 3402 3403 return self.expression( 3404 exp.Delete, 3405 tables=tables, 3406 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3407 using=self._match(TokenType.USING) 3408 and self._parse_csv(lambda: self._parse_table(joins=True)), 3409 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3410 where=self._parse_where(), 3411 returning=returning or self._parse_returning(), 3412 order=self._parse_order(), 3413 limit=self._parse_limit(), 3414 ) 3415 3416 def _parse_update(self) -> exp.Update: 3417 kwargs: t.Dict[str, t.Any] = { 3418 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3419 } 3420 while self._curr: 3421 if self._match(TokenType.SET): 3422 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3423 elif self._match(TokenType.RETURNING, advance=False): 3424 kwargs["returning"] = self._parse_returning() 3425 elif self._match(TokenType.FROM, advance=False): 3426 from_ = self._parse_from(joins=True) 3427 table = from_.this if from_ else None 3428 if isinstance(table, exp.Subquery) and self._match(TokenType.JOIN, advance=False): 3429 table.set("joins", list(self._parse_joins()) or None) 3430 3431 kwargs["from_"] = from_ 3432 elif self._match(TokenType.WHERE, advance=False): 3433 kwargs["where"] = self._parse_where() 3434 elif self._match(TokenType.ORDER_BY, advance=False): 3435 kwargs["order"] = self._parse_order() 3436 elif self._match(TokenType.LIMIT, advance=False): 3437 kwargs["limit"] = self._parse_limit() 3438 else: 3439 break 3440 3441 return self.expression(exp.Update, **kwargs) 3442 3443 def _parse_use(self) -> exp.Use: 3444 return self.expression( 3445 exp.Use, 3446 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3447 this=self._parse_table(schema=False), 3448 ) 3449 3450 def _parse_uncache(self) -> exp.Uncache: 3451 if not self._match(TokenType.TABLE): 3452 self.raise_error("Expecting TABLE after UNCACHE") 3453 3454 return self.expression( 3455 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3456 ) 3457 3458 def _parse_cache(self) -> exp.Cache: 3459 lazy = self._match_text_seq("LAZY") 3460 self._match(TokenType.TABLE) 3461 table = self._parse_table(schema=True) 3462 3463 options = [] 3464 if self._match_text_seq("OPTIONS"): 3465 self._match_l_paren() 3466 k = self._parse_string() 3467 self._match(TokenType.EQ) 3468 v = self._parse_string() 3469 options = [k, v] 3470 self._match_r_paren() 3471 3472 self._match(TokenType.ALIAS) 3473 return self.expression( 3474 exp.Cache, 3475 this=table, 3476 lazy=lazy, 3477 options=options, 3478 expression=self._parse_select(nested=True), 3479 ) 3480 3481 def _parse_partition(self) -> t.Optional[exp.Partition]: 3482 if not self._match_texts(self.PARTITION_KEYWORDS): 3483 return None 3484 3485 return self.expression( 3486 exp.Partition, 3487 subpartition=self._prev.text.upper() == "SUBPARTITION", 3488 expressions=self._parse_wrapped_csv(self._parse_disjunction), 3489 ) 3490 3491 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3492 def _parse_value_expression() -> t.Optional[exp.Expression]: 3493 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3494 return exp.var(self._prev.text.upper()) 3495 return self._parse_expression() 3496 3497 if self._match(TokenType.L_PAREN): 3498 expressions = self._parse_csv(_parse_value_expression) 3499 self._match_r_paren() 3500 return self.expression(exp.Tuple, expressions=expressions) 3501 3502 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3503 expression = self._parse_expression() 3504 if expression: 3505 return self.expression(exp.Tuple, expressions=[expression]) 3506 return None 3507 3508 def _parse_projections( 3509 self, 3510 ) -> t.Tuple[t.List[exp.Expression], t.Optional[t.List[exp.Expression]]]: 3511 return self._parse_expressions(), None 3512 3513 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3514 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3515 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3516 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3517 ) 3518 elif self._match(TokenType.FROM): 3519 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3520 # Support parentheses for duckdb FROM-first syntax 3521 select = self._parse_select(from_=from_) 3522 if select: 3523 if not select.args.get("from_"): 3524 select.set("from_", from_) 3525 this = select 3526 else: 3527 this = exp.select("*").from_(t.cast(exp.From, from_)) 3528 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3529 else: 3530 this = ( 3531 self._parse_table(consume_pipe=True) 3532 if table 3533 else self._parse_select(nested=True, parse_set_operation=False) 3534 ) 3535 3536 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3537 # in case a modifier (e.g. join) is following 3538 if table and isinstance(this, exp.Values) and this.alias: 3539 alias = this.args["alias"].pop() 3540 this = exp.Table(this=this, alias=alias) 3541 3542 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3543 3544 return this 3545 3546 def _parse_select( 3547 self, 3548 nested: bool = False, 3549 table: bool = False, 3550 parse_subquery_alias: bool = True, 3551 parse_set_operation: bool = True, 3552 consume_pipe: bool = True, 3553 from_: t.Optional[exp.From] = None, 3554 ) -> t.Optional[exp.Expression]: 3555 query = self._parse_select_query( 3556 nested=nested, 3557 table=table, 3558 parse_subquery_alias=parse_subquery_alias, 3559 parse_set_operation=parse_set_operation, 3560 ) 3561 3562 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3563 if not query and from_: 3564 query = exp.select("*").from_(from_) 3565 if isinstance(query, exp.Query): 3566 query = self._parse_pipe_syntax_query(query) 3567 query = query.subquery(copy=False) if query and table else query 3568 3569 return query 3570 3571 def _parse_select_query( 3572 self, 3573 nested: bool = False, 3574 table: bool = False, 3575 parse_subquery_alias: bool = True, 3576 parse_set_operation: bool = True, 3577 ) -> t.Optional[exp.Expression]: 3578 cte = self._parse_with() 3579 3580 if cte: 3581 this = self._parse_statement() 3582 3583 if not this: 3584 self.raise_error("Failed to parse any statement following CTE") 3585 return cte 3586 3587 while isinstance(this, exp.Subquery) and this.is_wrapper: 3588 this = this.this 3589 3590 if "with_" in this.arg_types: 3591 this.set("with_", cte) 3592 else: 3593 self.raise_error(f"{this.key} does not support CTE") 3594 this = cte 3595 3596 return this 3597 3598 # duckdb supports leading with FROM x 3599 from_ = ( 3600 self._parse_from(joins=True, consume_pipe=True) 3601 if self._match(TokenType.FROM, advance=False) 3602 else None 3603 ) 3604 3605 if self._match(TokenType.SELECT): 3606 comments = self._prev_comments 3607 3608 hint = self._parse_hint() 3609 3610 if self._next and not self._next.token_type == TokenType.DOT: 3611 all_ = self._match(TokenType.ALL) 3612 matched_distinct = self._match_set(self.DISTINCT_TOKENS) 3613 else: 3614 all_, matched_distinct = None, False 3615 3616 kind = ( 3617 self._prev.text.upper() 3618 if self._match(TokenType.ALIAS) and self._match_texts(("STRUCT", "VALUE")) 3619 else None 3620 ) 3621 3622 distinct: t.Optional[exp.Expression] = ( 3623 self.expression( 3624 exp.Distinct, 3625 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3626 ) 3627 if matched_distinct 3628 else None 3629 ) 3630 3631 if all_ and distinct: 3632 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3633 3634 operation_modifiers = [] 3635 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3636 operation_modifiers.append(exp.var(self._prev.text.upper())) 3637 3638 limit = self._parse_limit(top=True) 3639 projections, exclude = self._parse_projections() 3640 3641 this = self.expression( 3642 exp.Select, 3643 kind=kind, 3644 hint=hint, 3645 distinct=distinct, 3646 expressions=projections, 3647 limit=limit, 3648 exclude=exclude, 3649 operation_modifiers=operation_modifiers or None, 3650 ) 3651 this.comments = comments 3652 3653 into = self._parse_into() 3654 if into: 3655 this.set("into", into) 3656 3657 if not from_: 3658 from_ = self._parse_from() 3659 3660 if from_: 3661 this.set("from_", from_) 3662 3663 this = self._parse_query_modifiers(this) 3664 elif (table or nested) and self._match(TokenType.L_PAREN): 3665 comments = self._prev_comments 3666 this = self._parse_wrapped_select(table=table) 3667 3668 if this: 3669 this.add_comments(comments, prepend=True) 3670 3671 # We return early here so that the UNION isn't attached to the subquery by the 3672 # following call to _parse_set_operations, but instead becomes the parent node 3673 self._match_r_paren() 3674 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3675 elif self._match(TokenType.VALUES, advance=False): 3676 this = self._parse_derived_table_values() 3677 elif from_: 3678 this = exp.select("*").from_(from_.this, copy=False) 3679 elif self._match(TokenType.SUMMARIZE): 3680 table = self._match(TokenType.TABLE) 3681 this = self._parse_select() or self._parse_string() or self._parse_table() 3682 return self.expression(exp.Summarize, this=this, table=table) 3683 elif self._match(TokenType.DESCRIBE): 3684 this = self._parse_describe() 3685 else: 3686 this = None 3687 3688 return self._parse_set_operations(this) if parse_set_operation else this 3689 3690 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3691 self._match_text_seq("SEARCH") 3692 3693 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3694 3695 if not kind: 3696 return None 3697 3698 self._match_text_seq("FIRST", "BY") 3699 3700 return self.expression( 3701 exp.RecursiveWithSearch, 3702 kind=kind, 3703 this=self._parse_id_var(), 3704 expression=self._match_text_seq("SET") and self._parse_id_var(), 3705 using=self._match_text_seq("USING") and self._parse_id_var(), 3706 ) 3707 3708 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3709 if not skip_with_token and not self._match(TokenType.WITH): 3710 return None 3711 3712 comments = self._prev_comments 3713 recursive = self._match(TokenType.RECURSIVE) 3714 3715 last_comments = None 3716 expressions = [] 3717 while True: 3718 cte = self._parse_cte() 3719 if isinstance(cte, exp.CTE): 3720 expressions.append(cte) 3721 if last_comments: 3722 cte.add_comments(last_comments) 3723 3724 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3725 break 3726 else: 3727 self._match(TokenType.WITH) 3728 3729 last_comments = self._prev_comments 3730 3731 return self.expression( 3732 exp.With, 3733 comments=comments, 3734 expressions=expressions, 3735 recursive=recursive or None, 3736 search=self._parse_recursive_with_search(), 3737 ) 3738 3739 def _parse_cte(self) -> t.Optional[exp.CTE]: 3740 index = self._index 3741 3742 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3743 if not alias or not alias.this: 3744 self.raise_error("Expected CTE to have alias") 3745 3746 key_expressions = ( 3747 self._parse_wrapped_id_vars() if self._match_text_seq("USING", "KEY") else None 3748 ) 3749 3750 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3751 self._retreat(index) 3752 return None 3753 3754 comments = self._prev_comments 3755 3756 if self._match_text_seq("NOT", "MATERIALIZED"): 3757 materialized = False 3758 elif self._match_text_seq("MATERIALIZED"): 3759 materialized = True 3760 else: 3761 materialized = None 3762 3763 cte = self.expression( 3764 exp.CTE, 3765 this=self._parse_wrapped(self._parse_statement), 3766 alias=alias, 3767 materialized=materialized, 3768 key_expressions=key_expressions, 3769 comments=comments, 3770 ) 3771 3772 values = cte.this 3773 if isinstance(values, exp.Values): 3774 if values.alias: 3775 cte.set("this", exp.select("*").from_(values)) 3776 else: 3777 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3778 3779 return cte 3780 3781 def _parse_table_alias( 3782 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3783 ) -> t.Optional[exp.TableAlias]: 3784 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3785 # so this section tries to parse the clause version and if it fails, it treats the token 3786 # as an identifier (alias) 3787 if self._can_parse_limit_or_offset(): 3788 return None 3789 3790 any_token = self._match(TokenType.ALIAS) 3791 alias = ( 3792 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3793 or self._parse_string_as_identifier() 3794 ) 3795 3796 index = self._index 3797 if self._match(TokenType.L_PAREN): 3798 columns = self._parse_csv(self._parse_function_parameter) 3799 self._match_r_paren() if columns else self._retreat(index) 3800 else: 3801 columns = None 3802 3803 if not alias and not columns: 3804 return None 3805 3806 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3807 3808 # We bubble up comments from the Identifier to the TableAlias 3809 if isinstance(alias, exp.Identifier): 3810 table_alias.add_comments(alias.pop_comments()) 3811 3812 return table_alias 3813 3814 def _parse_subquery( 3815 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3816 ) -> t.Optional[exp.Subquery]: 3817 if not this: 3818 return None 3819 3820 return self.expression( 3821 exp.Subquery, 3822 this=this, 3823 pivots=self._parse_pivots(), 3824 alias=self._parse_table_alias() if parse_alias else None, 3825 sample=self._parse_table_sample(), 3826 ) 3827 3828 def _implicit_unnests_to_explicit(self, this: E) -> E: 3829 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3830 3831 refs = {_norm(this.args["from_"].this.copy(), dialect=self.dialect).alias_or_name} 3832 for i, join in enumerate(this.args.get("joins") or []): 3833 table = join.this 3834 normalized_table = table.copy() 3835 normalized_table.meta["maybe_column"] = True 3836 normalized_table = _norm(normalized_table, dialect=self.dialect) 3837 3838 if isinstance(table, exp.Table) and not join.args.get("on"): 3839 if normalized_table.parts[0].name in refs: 3840 table_as_column = table.to_column() 3841 unnest = exp.Unnest(expressions=[table_as_column]) 3842 3843 # Table.to_column creates a parent Alias node that we want to convert to 3844 # a TableAlias and attach to the Unnest, so it matches the parser's output 3845 if isinstance(table.args.get("alias"), exp.TableAlias): 3846 table_as_column.replace(table_as_column.this) 3847 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3848 3849 table.replace(unnest) 3850 3851 refs.add(normalized_table.alias_or_name) 3852 3853 return this 3854 3855 @t.overload 3856 def _parse_query_modifiers(self, this: E) -> E: ... 3857 3858 @t.overload 3859 def _parse_query_modifiers(self, this: None) -> None: ... 3860 3861 def _parse_query_modifiers(self, this): 3862 if isinstance(this, self.MODIFIABLES): 3863 for join in self._parse_joins(): 3864 this.append("joins", join) 3865 for lateral in iter(self._parse_lateral, None): 3866 this.append("laterals", lateral) 3867 3868 while True: 3869 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3870 modifier_token = self._curr 3871 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3872 key, expression = parser(self) 3873 3874 if expression: 3875 if this.args.get(key): 3876 self.raise_error( 3877 f"Found multiple '{modifier_token.text.upper()}' clauses", 3878 token=modifier_token, 3879 ) 3880 3881 this.set(key, expression) 3882 if key == "limit": 3883 offset = expression.args.get("offset") 3884 expression.set("offset", None) 3885 3886 if offset: 3887 offset = exp.Offset(expression=offset) 3888 this.set("offset", offset) 3889 3890 limit_by_expressions = expression.expressions 3891 expression.set("expressions", None) 3892 offset.set("expressions", limit_by_expressions) 3893 continue 3894 break 3895 3896 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from_"): 3897 this = self._implicit_unnests_to_explicit(this) 3898 3899 return this 3900 3901 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3902 start = self._curr 3903 while self._curr: 3904 self._advance() 3905 3906 end = self._tokens[self._index - 1] 3907 return exp.Hint(expressions=[self._find_sql(start, end)]) 3908 3909 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3910 return self._parse_function_call() 3911 3912 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3913 start_index = self._index 3914 should_fallback_to_string = False 3915 3916 hints = [] 3917 try: 3918 for hint in iter( 3919 lambda: self._parse_csv( 3920 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3921 ), 3922 [], 3923 ): 3924 hints.extend(hint) 3925 except ParseError: 3926 should_fallback_to_string = True 3927 3928 if should_fallback_to_string or self._curr: 3929 self._retreat(start_index) 3930 return self._parse_hint_fallback_to_string() 3931 3932 return self.expression(exp.Hint, expressions=hints) 3933 3934 def _parse_hint(self) -> t.Optional[exp.Hint]: 3935 if self._match(TokenType.HINT) and self._prev_comments: 3936 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3937 3938 return None 3939 3940 def _parse_into(self) -> t.Optional[exp.Into]: 3941 if not self._match(TokenType.INTO): 3942 return None 3943 3944 temp = self._match(TokenType.TEMPORARY) 3945 unlogged = self._match_text_seq("UNLOGGED") 3946 self._match(TokenType.TABLE) 3947 3948 return self.expression( 3949 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3950 ) 3951 3952 def _parse_from( 3953 self, 3954 joins: bool = False, 3955 skip_from_token: bool = False, 3956 consume_pipe: bool = False, 3957 ) -> t.Optional[exp.From]: 3958 if not skip_from_token and not self._match(TokenType.FROM): 3959 return None 3960 3961 return self.expression( 3962 exp.From, 3963 comments=self._prev_comments, 3964 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3965 ) 3966 3967 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3968 return self.expression( 3969 exp.MatchRecognizeMeasure, 3970 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3971 this=self._parse_expression(), 3972 ) 3973 3974 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3975 if not self._match(TokenType.MATCH_RECOGNIZE): 3976 return None 3977 3978 self._match_l_paren() 3979 3980 partition = self._parse_partition_by() 3981 order = self._parse_order() 3982 3983 measures = ( 3984 self._parse_csv(self._parse_match_recognize_measure) 3985 if self._match_text_seq("MEASURES") 3986 else None 3987 ) 3988 3989 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3990 rows = exp.var("ONE ROW PER MATCH") 3991 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3992 text = "ALL ROWS PER MATCH" 3993 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3994 text += " SHOW EMPTY MATCHES" 3995 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3996 text += " OMIT EMPTY MATCHES" 3997 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3998 text += " WITH UNMATCHED ROWS" 3999 rows = exp.var(text) 4000 else: 4001 rows = None 4002 4003 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 4004 text = "AFTER MATCH SKIP" 4005 if self._match_text_seq("PAST", "LAST", "ROW"): 4006 text += " PAST LAST ROW" 4007 elif self._match_text_seq("TO", "NEXT", "ROW"): 4008 text += " TO NEXT ROW" 4009 elif self._match_text_seq("TO", "FIRST"): 4010 text += f" TO FIRST {self._advance_any().text}" # type: ignore 4011 elif self._match_text_seq("TO", "LAST"): 4012 text += f" TO LAST {self._advance_any().text}" # type: ignore 4013 after = exp.var(text) 4014 else: 4015 after = None 4016 4017 if self._match_text_seq("PATTERN"): 4018 self._match_l_paren() 4019 4020 if not self._curr: 4021 self.raise_error("Expecting )", self._curr) 4022 4023 paren = 1 4024 start = self._curr 4025 4026 while self._curr and paren > 0: 4027 if self._curr.token_type == TokenType.L_PAREN: 4028 paren += 1 4029 if self._curr.token_type == TokenType.R_PAREN: 4030 paren -= 1 4031 4032 end = self._prev 4033 self._advance() 4034 4035 if paren > 0: 4036 self.raise_error("Expecting )", self._curr) 4037 4038 pattern = exp.var(self._find_sql(start, end)) 4039 else: 4040 pattern = None 4041 4042 define = ( 4043 self._parse_csv(self._parse_name_as_expression) 4044 if self._match_text_seq("DEFINE") 4045 else None 4046 ) 4047 4048 self._match_r_paren() 4049 4050 return self.expression( 4051 exp.MatchRecognize, 4052 partition_by=partition, 4053 order=order, 4054 measures=measures, 4055 rows=rows, 4056 after=after, 4057 pattern=pattern, 4058 define=define, 4059 alias=self._parse_table_alias(), 4060 ) 4061 4062 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 4063 cross_apply: t.Optional[bool] = None 4064 if self._match_pair(TokenType.CROSS, TokenType.APPLY): 4065 cross_apply = True 4066 elif self._match_pair(TokenType.OUTER, TokenType.APPLY): 4067 cross_apply = False 4068 4069 if cross_apply is not None: 4070 this = self._parse_select(table=True) 4071 view = None 4072 outer = None 4073 elif self._match(TokenType.LATERAL): 4074 this = self._parse_select(table=True) 4075 view = self._match(TokenType.VIEW) 4076 outer = self._match(TokenType.OUTER) 4077 else: 4078 return None 4079 4080 if not this: 4081 this = ( 4082 self._parse_unnest() 4083 or self._parse_function() 4084 or self._parse_id_var(any_token=False) 4085 ) 4086 4087 while self._match(TokenType.DOT): 4088 this = exp.Dot( 4089 this=this, 4090 expression=self._parse_function() or self._parse_id_var(any_token=False), 4091 ) 4092 4093 ordinality: t.Optional[bool] = None 4094 4095 if view: 4096 table = self._parse_id_var(any_token=False) 4097 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 4098 table_alias: t.Optional[exp.TableAlias] = self.expression( 4099 exp.TableAlias, this=table, columns=columns 4100 ) 4101 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 4102 # We move the alias from the lateral's child node to the lateral itself 4103 table_alias = this.args["alias"].pop() 4104 else: 4105 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4106 table_alias = self._parse_table_alias() 4107 4108 return self.expression( 4109 exp.Lateral, 4110 this=this, 4111 view=view, 4112 outer=outer, 4113 alias=table_alias, 4114 cross_apply=cross_apply, 4115 ordinality=ordinality, 4116 ) 4117 4118 def _parse_stream(self) -> t.Optional[exp.Stream]: 4119 index = self._index 4120 if self._match_text_seq("STREAM"): 4121 this = self._try_parse(self._parse_table) 4122 if this: 4123 return self.expression(exp.Stream, this=this) 4124 4125 self._retreat(index) 4126 return None 4127 4128 def _parse_join_parts( 4129 self, 4130 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 4131 return ( 4132 self._prev if self._match_set(self.JOIN_METHODS) else None, 4133 self._prev if self._match_set(self.JOIN_SIDES) else None, 4134 self._prev if self._match_set(self.JOIN_KINDS) else None, 4135 ) 4136 4137 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 4138 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 4139 this = self._parse_column() 4140 if isinstance(this, exp.Column): 4141 return this.this 4142 return this 4143 4144 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 4145 4146 def _parse_join( 4147 self, skip_join_token: bool = False, parse_bracket: bool = False 4148 ) -> t.Optional[exp.Join]: 4149 if self._match(TokenType.COMMA): 4150 table = self._try_parse(self._parse_table) 4151 cross_join = self.expression(exp.Join, this=table) if table else None 4152 4153 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 4154 cross_join.set("kind", "CROSS") 4155 4156 return cross_join 4157 4158 index = self._index 4159 method, side, kind = self._parse_join_parts() 4160 directed = self._match_text_seq("DIRECTED") 4161 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 4162 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 4163 join_comments = self._prev_comments 4164 4165 if not skip_join_token and not join: 4166 self._retreat(index) 4167 kind = None 4168 method = None 4169 side = None 4170 4171 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 4172 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 4173 4174 if not skip_join_token and not join and not outer_apply and not cross_apply: 4175 return None 4176 4177 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 4178 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 4179 kwargs["expressions"] = self._parse_csv( 4180 lambda: self._parse_table(parse_bracket=parse_bracket) 4181 ) 4182 4183 if method: 4184 kwargs["method"] = method.text.upper() 4185 if side: 4186 kwargs["side"] = side.text.upper() 4187 if kind: 4188 kwargs["kind"] = kind.text.upper() 4189 if hint: 4190 kwargs["hint"] = hint 4191 4192 if self._match(TokenType.MATCH_CONDITION): 4193 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 4194 4195 if self._match(TokenType.ON): 4196 kwargs["on"] = self._parse_disjunction() 4197 elif self._match(TokenType.USING): 4198 kwargs["using"] = self._parse_using_identifiers() 4199 elif ( 4200 not method 4201 and not (outer_apply or cross_apply) 4202 and not isinstance(kwargs["this"], exp.Unnest) 4203 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 4204 ): 4205 index = self._index 4206 joins: t.Optional[list] = list(self._parse_joins()) 4207 4208 if joins and self._match(TokenType.ON): 4209 kwargs["on"] = self._parse_disjunction() 4210 elif joins and self._match(TokenType.USING): 4211 kwargs["using"] = self._parse_using_identifiers() 4212 else: 4213 joins = None 4214 self._retreat(index) 4215 4216 kwargs["this"].set("joins", joins if joins else None) 4217 4218 kwargs["pivots"] = self._parse_pivots() 4219 4220 comments = [c for token in (method, side, kind) if token for c in token.comments] 4221 comments = (join_comments or []) + comments 4222 4223 if ( 4224 self.ADD_JOIN_ON_TRUE 4225 and not kwargs.get("on") 4226 and not kwargs.get("using") 4227 and not kwargs.get("method") 4228 and kwargs.get("kind") in (None, "INNER", "OUTER") 4229 ): 4230 kwargs["on"] = exp.true() 4231 4232 if directed: 4233 kwargs["directed"] = directed 4234 4235 return self.expression(exp.Join, comments=comments, **kwargs) 4236 4237 def _parse_opclass(self) -> t.Optional[exp.Expression]: 4238 this = self._parse_disjunction() 4239 4240 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 4241 return this 4242 4243 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 4244 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 4245 4246 return this 4247 4248 def _parse_index_params(self) -> exp.IndexParameters: 4249 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 4250 4251 if self._match(TokenType.L_PAREN, advance=False): 4252 columns = self._parse_wrapped_csv(self._parse_with_operator) 4253 else: 4254 columns = None 4255 4256 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 4257 partition_by = self._parse_partition_by() 4258 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 4259 tablespace = ( 4260 self._parse_var(any_token=True) 4261 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 4262 else None 4263 ) 4264 where = self._parse_where() 4265 4266 on = self._parse_field() if self._match(TokenType.ON) else None 4267 4268 return self.expression( 4269 exp.IndexParameters, 4270 using=using, 4271 columns=columns, 4272 include=include, 4273 partition_by=partition_by, 4274 where=where, 4275 with_storage=with_storage, 4276 tablespace=tablespace, 4277 on=on, 4278 ) 4279 4280 def _parse_index( 4281 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 4282 ) -> t.Optional[exp.Index]: 4283 if index or anonymous: 4284 unique = None 4285 primary = None 4286 amp = None 4287 4288 self._match(TokenType.ON) 4289 self._match(TokenType.TABLE) # hive 4290 table = self._parse_table_parts(schema=True) 4291 else: 4292 unique = self._match(TokenType.UNIQUE) 4293 primary = self._match_text_seq("PRIMARY") 4294 amp = self._match_text_seq("AMP") 4295 4296 if not self._match(TokenType.INDEX): 4297 return None 4298 4299 index = self._parse_id_var() 4300 table = None 4301 4302 params = self._parse_index_params() 4303 4304 return self.expression( 4305 exp.Index, 4306 this=index, 4307 table=table, 4308 unique=unique, 4309 primary=primary, 4310 amp=amp, 4311 params=params, 4312 ) 4313 4314 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 4315 hints: t.List[exp.Expression] = [] 4316 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 4317 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 4318 hints.append( 4319 self.expression( 4320 exp.WithTableHint, 4321 expressions=self._parse_csv( 4322 lambda: self._parse_function() or self._parse_var(any_token=True) 4323 ), 4324 ) 4325 ) 4326 self._match_r_paren() 4327 else: 4328 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 4329 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 4330 hint = exp.IndexTableHint(this=self._prev.text.upper()) 4331 4332 self._match_set((TokenType.INDEX, TokenType.KEY)) 4333 if self._match(TokenType.FOR): 4334 hint.set("target", self._advance_any() and self._prev.text.upper()) 4335 4336 hint.set("expressions", self._parse_wrapped_id_vars()) 4337 hints.append(hint) 4338 4339 return hints or None 4340 4341 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 4342 return ( 4343 (not schema and self._parse_function(optional_parens=False)) 4344 or self._parse_id_var(any_token=False) 4345 or self._parse_string_as_identifier() 4346 or self._parse_placeholder() 4347 ) 4348 4349 def _parse_table_parts( 4350 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 4351 ) -> exp.Table: 4352 catalog = None 4353 db = None 4354 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 4355 4356 while self._match(TokenType.DOT): 4357 if catalog: 4358 # This allows nesting the table in arbitrarily many dot expressions if needed 4359 table = self.expression( 4360 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4361 ) 4362 else: 4363 catalog = db 4364 db = table 4365 # "" used for tsql FROM a..b case 4366 table = self._parse_table_part(schema=schema) or "" 4367 4368 if ( 4369 wildcard 4370 and self._is_connected() 4371 and (isinstance(table, exp.Identifier) or not table) 4372 and self._match(TokenType.STAR) 4373 ): 4374 if isinstance(table, exp.Identifier): 4375 table.args["this"] += "*" 4376 else: 4377 table = exp.Identifier(this="*") 4378 4379 # We bubble up comments from the Identifier to the Table 4380 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4381 4382 if is_db_reference: 4383 catalog = db 4384 db = table 4385 table = None 4386 4387 if not table and not is_db_reference: 4388 self.raise_error(f"Expected table name but got {self._curr}") 4389 if not db and is_db_reference: 4390 self.raise_error(f"Expected database name but got {self._curr}") 4391 4392 table = self.expression( 4393 exp.Table, 4394 comments=comments, 4395 this=table, 4396 db=db, 4397 catalog=catalog, 4398 ) 4399 4400 changes = self._parse_changes() 4401 if changes: 4402 table.set("changes", changes) 4403 4404 at_before = self._parse_historical_data() 4405 if at_before: 4406 table.set("when", at_before) 4407 4408 pivots = self._parse_pivots() 4409 if pivots: 4410 table.set("pivots", pivots) 4411 4412 return table 4413 4414 def _parse_table( 4415 self, 4416 schema: bool = False, 4417 joins: bool = False, 4418 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4419 parse_bracket: bool = False, 4420 is_db_reference: bool = False, 4421 parse_partition: bool = False, 4422 consume_pipe: bool = False, 4423 ) -> t.Optional[exp.Expression]: 4424 stream = self._parse_stream() 4425 if stream: 4426 return stream 4427 4428 lateral = self._parse_lateral() 4429 if lateral: 4430 return lateral 4431 4432 unnest = self._parse_unnest() 4433 if unnest: 4434 return unnest 4435 4436 values = self._parse_derived_table_values() 4437 if values: 4438 return values 4439 4440 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4441 if subquery: 4442 if not subquery.args.get("pivots"): 4443 subquery.set("pivots", self._parse_pivots()) 4444 return subquery 4445 4446 bracket = parse_bracket and self._parse_bracket(None) 4447 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4448 4449 rows_from_tables = ( 4450 self._parse_wrapped_csv(self._parse_table) 4451 if self._match_text_seq("ROWS", "FROM") 4452 else None 4453 ) 4454 rows_from = ( 4455 self.expression(exp.Table, rows_from=rows_from_tables) if rows_from_tables else None 4456 ) 4457 4458 only = self._match(TokenType.ONLY) 4459 4460 this = t.cast( 4461 exp.Expression, 4462 bracket 4463 or rows_from 4464 or self._parse_bracket( 4465 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4466 ), 4467 ) 4468 4469 if only: 4470 this.set("only", only) 4471 4472 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4473 self._match_text_seq("*") 4474 4475 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4476 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4477 this.set("partition", self._parse_partition()) 4478 4479 if schema: 4480 return self._parse_schema(this=this) 4481 4482 if self.dialect.ALIAS_POST_VERSION: 4483 this.set("version", self._parse_version()) 4484 4485 if self.dialect.ALIAS_POST_TABLESAMPLE: 4486 this.set("sample", self._parse_table_sample()) 4487 4488 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4489 if alias: 4490 this.set("alias", alias) 4491 4492 if self._match(TokenType.INDEXED_BY): 4493 this.set("indexed", self._parse_table_parts()) 4494 elif self._match_text_seq("NOT", "INDEXED"): 4495 this.set("indexed", False) 4496 4497 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4498 return self.expression( 4499 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4500 ) 4501 4502 this.set("hints", self._parse_table_hints()) 4503 4504 if not this.args.get("pivots"): 4505 this.set("pivots", self._parse_pivots()) 4506 4507 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4508 this.set("sample", self._parse_table_sample()) 4509 4510 if not self.dialect.ALIAS_POST_VERSION: 4511 this.set("version", self._parse_version()) 4512 4513 if joins: 4514 for join in self._parse_joins(): 4515 this.append("joins", join) 4516 4517 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4518 this.set("ordinality", True) 4519 this.set("alias", self._parse_table_alias()) 4520 4521 return this 4522 4523 def _parse_version(self) -> t.Optional[exp.Version]: 4524 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4525 this = "TIMESTAMP" 4526 elif self._match(TokenType.VERSION_SNAPSHOT): 4527 this = "VERSION" 4528 else: 4529 return None 4530 4531 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4532 kind = self._prev.text.upper() 4533 start = self._parse_bitwise() 4534 self._match_texts(("TO", "AND")) 4535 end = self._parse_bitwise() 4536 expression: t.Optional[exp.Expression] = self.expression( 4537 exp.Tuple, expressions=[start, end] 4538 ) 4539 elif self._match_text_seq("CONTAINED", "IN"): 4540 kind = "CONTAINED IN" 4541 expression = self.expression( 4542 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4543 ) 4544 elif self._match(TokenType.ALL): 4545 kind = "ALL" 4546 expression = None 4547 else: 4548 self._match_text_seq("AS", "OF") 4549 kind = "AS OF" 4550 expression = self._parse_type() 4551 4552 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4553 4554 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4555 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4556 index = self._index 4557 historical_data = None 4558 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4559 this = self._prev.text.upper() 4560 kind = ( 4561 self._match(TokenType.L_PAREN) 4562 and self._match_texts(self.HISTORICAL_DATA_KIND) 4563 and self._prev.text.upper() 4564 ) 4565 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4566 4567 if expression: 4568 self._match_r_paren() 4569 historical_data = self.expression( 4570 exp.HistoricalData, this=this, kind=kind, expression=expression 4571 ) 4572 else: 4573 self._retreat(index) 4574 4575 return historical_data 4576 4577 def _parse_changes(self) -> t.Optional[exp.Changes]: 4578 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4579 return None 4580 4581 information = self._parse_var(any_token=True) 4582 self._match_r_paren() 4583 4584 return self.expression( 4585 exp.Changes, 4586 information=information, 4587 at_before=self._parse_historical_data(), 4588 end=self._parse_historical_data(), 4589 ) 4590 4591 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4592 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4593 return None 4594 4595 self._advance() 4596 4597 expressions = self._parse_wrapped_csv(self._parse_equality) 4598 offset: t.Union[bool, exp.Expression] = self._match_pair( 4599 TokenType.WITH, TokenType.ORDINALITY 4600 ) 4601 4602 alias = self._parse_table_alias() if with_alias else None 4603 4604 if alias: 4605 if self.dialect.UNNEST_COLUMN_ONLY: 4606 if alias.args.get("columns"): 4607 self.raise_error("Unexpected extra column alias in unnest.") 4608 4609 alias.set("columns", [alias.this]) 4610 alias.set("this", None) 4611 4612 columns = alias.args.get("columns") or [] 4613 if offset and len(expressions) < len(columns): 4614 offset = columns.pop() 4615 4616 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4617 self._match(TokenType.ALIAS) 4618 offset = self._parse_id_var( 4619 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4620 ) or exp.to_identifier("offset") 4621 4622 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4623 4624 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4625 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4626 if not is_derived and not ( 4627 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4628 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4629 ): 4630 return None 4631 4632 expressions = self._parse_csv(self._parse_value) 4633 alias = self._parse_table_alias() 4634 4635 if is_derived: 4636 self._match_r_paren() 4637 4638 return self.expression( 4639 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4640 ) 4641 4642 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4643 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4644 as_modifier and self._match_text_seq("USING", "SAMPLE") 4645 ): 4646 return None 4647 4648 bucket_numerator = None 4649 bucket_denominator = None 4650 bucket_field = None 4651 percent = None 4652 size = None 4653 seed = None 4654 4655 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4656 matched_l_paren = self._match(TokenType.L_PAREN) 4657 4658 if self.TABLESAMPLE_CSV: 4659 num = None 4660 expressions = self._parse_csv(self._parse_primary) 4661 else: 4662 expressions = None 4663 num = ( 4664 self._parse_factor() 4665 if self._match(TokenType.NUMBER, advance=False) 4666 else self._parse_primary() or self._parse_placeholder() 4667 ) 4668 4669 if self._match_text_seq("BUCKET"): 4670 bucket_numerator = self._parse_number() 4671 self._match_text_seq("OUT", "OF") 4672 bucket_denominator = bucket_denominator = self._parse_number() 4673 self._match(TokenType.ON) 4674 bucket_field = self._parse_field() 4675 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4676 percent = num 4677 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4678 size = num 4679 else: 4680 percent = num 4681 4682 if matched_l_paren: 4683 self._match_r_paren() 4684 4685 if self._match(TokenType.L_PAREN): 4686 method = self._parse_var(upper=True) 4687 seed = self._match(TokenType.COMMA) and self._parse_number() 4688 self._match_r_paren() 4689 elif self._match_texts(("SEED", "REPEATABLE")): 4690 seed = self._parse_wrapped(self._parse_number) 4691 4692 if not method and self.DEFAULT_SAMPLING_METHOD: 4693 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4694 4695 return self.expression( 4696 exp.TableSample, 4697 expressions=expressions, 4698 method=method, 4699 bucket_numerator=bucket_numerator, 4700 bucket_denominator=bucket_denominator, 4701 bucket_field=bucket_field, 4702 percent=percent, 4703 size=size, 4704 seed=seed, 4705 ) 4706 4707 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4708 return list(iter(self._parse_pivot, None)) or None 4709 4710 def _parse_joins(self) -> t.Iterator[exp.Join]: 4711 return iter(self._parse_join, None) 4712 4713 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4714 if not self._match(TokenType.INTO): 4715 return None 4716 4717 return self.expression( 4718 exp.UnpivotColumns, 4719 this=self._match_text_seq("NAME") and self._parse_column(), 4720 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4721 ) 4722 4723 # https://duckdb.org/docs/sql/statements/pivot 4724 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4725 def _parse_on() -> t.Optional[exp.Expression]: 4726 this = self._parse_bitwise() 4727 4728 if self._match(TokenType.IN): 4729 # PIVOT ... ON col IN (row_val1, row_val2) 4730 return self._parse_in(this) 4731 if self._match(TokenType.ALIAS, advance=False): 4732 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4733 return self._parse_alias(this) 4734 4735 return this 4736 4737 this = self._parse_table() 4738 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4739 into = self._parse_unpivot_columns() 4740 using = self._match(TokenType.USING) and self._parse_csv( 4741 lambda: self._parse_alias(self._parse_column()) 4742 ) 4743 group = self._parse_group() 4744 4745 return self.expression( 4746 exp.Pivot, 4747 this=this, 4748 expressions=expressions, 4749 using=using, 4750 group=group, 4751 unpivot=is_unpivot, 4752 into=into, 4753 ) 4754 4755 def _parse_pivot_in(self) -> exp.In: 4756 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4757 this = self._parse_select_or_expression() 4758 4759 self._match(TokenType.ALIAS) 4760 alias = self._parse_bitwise() 4761 if alias: 4762 if isinstance(alias, exp.Column) and not alias.db: 4763 alias = alias.this 4764 return self.expression(exp.PivotAlias, this=this, alias=alias) 4765 4766 return this 4767 4768 value = self._parse_column() 4769 4770 if not self._match(TokenType.IN): 4771 self.raise_error("Expecting IN") 4772 4773 if self._match(TokenType.L_PAREN): 4774 if self._match(TokenType.ANY): 4775 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4776 else: 4777 exprs = self._parse_csv(_parse_aliased_expression) 4778 self._match_r_paren() 4779 return self.expression(exp.In, this=value, expressions=exprs) 4780 4781 return self.expression(exp.In, this=value, field=self._parse_id_var()) 4782 4783 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4784 func = self._parse_function() 4785 if not func: 4786 if self._prev and self._prev.token_type == TokenType.COMMA: 4787 return None 4788 self.raise_error("Expecting an aggregation function in PIVOT") 4789 4790 return self._parse_alias(func) 4791 4792 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4793 index = self._index 4794 include_nulls = None 4795 4796 if self._match(TokenType.PIVOT): 4797 unpivot = False 4798 elif self._match(TokenType.UNPIVOT): 4799 unpivot = True 4800 4801 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4802 if self._match_text_seq("INCLUDE", "NULLS"): 4803 include_nulls = True 4804 elif self._match_text_seq("EXCLUDE", "NULLS"): 4805 include_nulls = False 4806 else: 4807 return None 4808 4809 expressions = [] 4810 4811 if not self._match(TokenType.L_PAREN): 4812 self._retreat(index) 4813 return None 4814 4815 if unpivot: 4816 expressions = self._parse_csv(self._parse_column) 4817 else: 4818 expressions = self._parse_csv(self._parse_pivot_aggregation) 4819 4820 if not expressions: 4821 self.raise_error("Failed to parse PIVOT's aggregation list") 4822 4823 if not self._match(TokenType.FOR): 4824 self.raise_error("Expecting FOR") 4825 4826 fields = [] 4827 while True: 4828 field = self._try_parse(self._parse_pivot_in) 4829 if not field: 4830 break 4831 fields.append(field) 4832 4833 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4834 self._parse_bitwise 4835 ) 4836 4837 group = self._parse_group() 4838 4839 self._match_r_paren() 4840 4841 pivot = self.expression( 4842 exp.Pivot, 4843 expressions=expressions, 4844 fields=fields, 4845 unpivot=unpivot, 4846 include_nulls=include_nulls, 4847 default_on_null=default_on_null, 4848 group=group, 4849 ) 4850 4851 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4852 pivot.set("alias", self._parse_table_alias()) 4853 4854 if not unpivot: 4855 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4856 4857 columns: t.List[exp.Expression] = [] 4858 all_fields = [] 4859 for pivot_field in pivot.fields: 4860 pivot_field_expressions = pivot_field.expressions 4861 4862 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4863 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4864 continue 4865 4866 all_fields.append( 4867 [ 4868 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4869 for fld in pivot_field_expressions 4870 ] 4871 ) 4872 4873 if all_fields: 4874 if names: 4875 all_fields.append(names) 4876 4877 # Generate all possible combinations of the pivot columns 4878 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4879 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4880 for fld_parts_tuple in itertools.product(*all_fields): 4881 fld_parts = list(fld_parts_tuple) 4882 4883 if names and self.PREFIXED_PIVOT_COLUMNS: 4884 # Move the "name" to the front of the list 4885 fld_parts.insert(0, fld_parts.pop(-1)) 4886 4887 columns.append(exp.to_identifier("_".join(fld_parts))) 4888 4889 pivot.set("columns", columns) 4890 4891 return pivot 4892 4893 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4894 return [agg.alias for agg in aggregations if agg.alias] 4895 4896 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4897 if not skip_where_token and not self._match(TokenType.PREWHERE): 4898 return None 4899 4900 return self.expression( 4901 exp.PreWhere, comments=self._prev_comments, this=self._parse_disjunction() 4902 ) 4903 4904 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4905 if not skip_where_token and not self._match(TokenType.WHERE): 4906 return None 4907 4908 return self.expression( 4909 exp.Where, comments=self._prev_comments, this=self._parse_disjunction() 4910 ) 4911 4912 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4913 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4914 return None 4915 comments = self._prev_comments 4916 4917 elements: t.Dict[str, t.Any] = defaultdict(list) 4918 4919 if self._match(TokenType.ALL): 4920 elements["all"] = True 4921 elif self._match(TokenType.DISTINCT): 4922 elements["all"] = False 4923 4924 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4925 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4926 4927 while True: 4928 index = self._index 4929 4930 elements["expressions"].extend( 4931 self._parse_csv( 4932 lambda: None 4933 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4934 else self._parse_disjunction() 4935 ) 4936 ) 4937 4938 before_with_index = self._index 4939 with_prefix = self._match(TokenType.WITH) 4940 4941 if cube_or_rollup := self._parse_cube_or_rollup(with_prefix=with_prefix): 4942 key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube" 4943 elements[key].append(cube_or_rollup) 4944 elif grouping_sets := self._parse_grouping_sets(): 4945 elements["grouping_sets"].append(grouping_sets) 4946 elif self._match_text_seq("TOTALS"): 4947 elements["totals"] = True # type: ignore 4948 4949 if before_with_index <= self._index <= before_with_index + 1: 4950 self._retreat(before_with_index) 4951 break 4952 4953 if index == self._index: 4954 break 4955 4956 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4957 4958 def _parse_cube_or_rollup(self, with_prefix: bool = False) -> t.Optional[exp.Cube | exp.Rollup]: 4959 if self._match(TokenType.CUBE): 4960 kind: t.Type[exp.Cube | exp.Rollup] = exp.Cube 4961 elif self._match(TokenType.ROLLUP): 4962 kind = exp.Rollup 4963 else: 4964 return None 4965 4966 return self.expression( 4967 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_bitwise) 4968 ) 4969 4970 def _parse_grouping_sets(self) -> t.Optional[exp.GroupingSets]: 4971 if self._match(TokenType.GROUPING_SETS): 4972 return self.expression( 4973 exp.GroupingSets, expressions=self._parse_wrapped_csv(self._parse_grouping_set) 4974 ) 4975 return None 4976 4977 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4978 return self._parse_grouping_sets() or self._parse_cube_or_rollup() or self._parse_bitwise() 4979 4980 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4981 if not skip_having_token and not self._match(TokenType.HAVING): 4982 return None 4983 return self.expression( 4984 exp.Having, comments=self._prev_comments, this=self._parse_disjunction() 4985 ) 4986 4987 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4988 if not self._match(TokenType.QUALIFY): 4989 return None 4990 return self.expression(exp.Qualify, this=self._parse_disjunction()) 4991 4992 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4993 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4994 exp.Prior, this=self._parse_bitwise() 4995 ) 4996 connect = self._parse_disjunction() 4997 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4998 return connect 4999 5000 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 5001 if skip_start_token: 5002 start = None 5003 elif self._match(TokenType.START_WITH): 5004 start = self._parse_disjunction() 5005 else: 5006 return None 5007 5008 self._match(TokenType.CONNECT_BY) 5009 nocycle = self._match_text_seq("NOCYCLE") 5010 connect = self._parse_connect_with_prior() 5011 5012 if not start and self._match(TokenType.START_WITH): 5013 start = self._parse_disjunction() 5014 5015 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 5016 5017 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 5018 this = self._parse_id_var(any_token=True) 5019 if self._match(TokenType.ALIAS): 5020 this = self.expression(exp.Alias, alias=this, this=self._parse_disjunction()) 5021 return this 5022 5023 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 5024 if self._match_text_seq("INTERPOLATE"): 5025 return self._parse_wrapped_csv(self._parse_name_as_expression) 5026 return None 5027 5028 def _parse_order( 5029 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 5030 ) -> t.Optional[exp.Expression]: 5031 siblings = None 5032 if not skip_order_token and not self._match(TokenType.ORDER_BY): 5033 if not self._match(TokenType.ORDER_SIBLINGS_BY): 5034 return this 5035 5036 siblings = True 5037 5038 return self.expression( 5039 exp.Order, 5040 comments=self._prev_comments, 5041 this=this, 5042 expressions=self._parse_csv(self._parse_ordered), 5043 siblings=siblings, 5044 ) 5045 5046 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 5047 if not self._match(token): 5048 return None 5049 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 5050 5051 def _parse_ordered( 5052 self, parse_method: t.Optional[t.Callable] = None 5053 ) -> t.Optional[exp.Ordered]: 5054 this = parse_method() if parse_method else self._parse_disjunction() 5055 if not this: 5056 return None 5057 5058 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 5059 this = exp.var("ALL") 5060 5061 asc = self._match(TokenType.ASC) 5062 desc: t.Optional[bool] = True if self._match(TokenType.DESC) else (False if asc else None) 5063 5064 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 5065 is_nulls_last = self._match_text_seq("NULLS", "LAST") 5066 5067 nulls_first = is_nulls_first or False 5068 explicitly_null_ordered = is_nulls_first or is_nulls_last 5069 5070 if ( 5071 not explicitly_null_ordered 5072 and ( 5073 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 5074 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 5075 ) 5076 and self.dialect.NULL_ORDERING != "nulls_are_last" 5077 ): 5078 nulls_first = True 5079 5080 if self._match_text_seq("WITH", "FILL"): 5081 with_fill = self.expression( 5082 exp.WithFill, 5083 from_=self._match(TokenType.FROM) and self._parse_bitwise(), 5084 to=self._match_text_seq("TO") and self._parse_bitwise(), 5085 step=self._match_text_seq("STEP") and self._parse_bitwise(), 5086 interpolate=self._parse_interpolate(), 5087 ) 5088 else: 5089 with_fill = None 5090 5091 return self.expression( 5092 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 5093 ) 5094 5095 def _parse_limit_options(self) -> t.Optional[exp.LimitOptions]: 5096 percent = self._match_set((TokenType.PERCENT, TokenType.MOD)) 5097 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 5098 self._match_text_seq("ONLY") 5099 with_ties = self._match_text_seq("WITH", "TIES") 5100 5101 if not (percent or rows or with_ties): 5102 return None 5103 5104 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 5105 5106 def _parse_limit( 5107 self, 5108 this: t.Optional[exp.Expression] = None, 5109 top: bool = False, 5110 skip_limit_token: bool = False, 5111 ) -> t.Optional[exp.Expression]: 5112 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 5113 comments = self._prev_comments 5114 if top: 5115 limit_paren = self._match(TokenType.L_PAREN) 5116 expression = self._parse_term() if limit_paren else self._parse_number() 5117 5118 if limit_paren: 5119 self._match_r_paren() 5120 5121 else: 5122 # Parsing LIMIT x% (i.e x PERCENT) as a term leads to an error, since 5123 # we try to build an exp.Mod expr. For that matter, we backtrack and instead 5124 # consume the factor plus parse the percentage separately 5125 index = self._index 5126 expression = self._try_parse(self._parse_term) 5127 if isinstance(expression, exp.Mod): 5128 self._retreat(index) 5129 expression = self._parse_factor() 5130 elif not expression: 5131 expression = self._parse_factor() 5132 limit_options = self._parse_limit_options() 5133 5134 if self._match(TokenType.COMMA): 5135 offset = expression 5136 expression = self._parse_term() 5137 else: 5138 offset = None 5139 5140 limit_exp = self.expression( 5141 exp.Limit, 5142 this=this, 5143 expression=expression, 5144 offset=offset, 5145 comments=comments, 5146 limit_options=limit_options, 5147 expressions=self._parse_limit_by(), 5148 ) 5149 5150 return limit_exp 5151 5152 if self._match(TokenType.FETCH): 5153 direction = ( 5154 self._prev.text.upper() 5155 if self._match_set((TokenType.FIRST, TokenType.NEXT)) 5156 else "FIRST" 5157 ) 5158 5159 count = self._parse_field(tokens=self.FETCH_TOKENS) 5160 5161 return self.expression( 5162 exp.Fetch, 5163 direction=direction, 5164 count=count, 5165 limit_options=self._parse_limit_options(), 5166 ) 5167 5168 return this 5169 5170 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5171 if not self._match(TokenType.OFFSET): 5172 return this 5173 5174 count = self._parse_term() 5175 self._match_set((TokenType.ROW, TokenType.ROWS)) 5176 5177 return self.expression( 5178 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 5179 ) 5180 5181 def _can_parse_limit_or_offset(self) -> bool: 5182 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 5183 return False 5184 5185 index = self._index 5186 result = bool( 5187 self._try_parse(self._parse_limit, retreat=True) 5188 or self._try_parse(self._parse_offset, retreat=True) 5189 ) 5190 self._retreat(index) 5191 5192 # MATCH_CONDITION (...) is a special construct that should not be consumed by limit/offset 5193 if self._next and self._next.token_type == TokenType.MATCH_CONDITION: 5194 result = False 5195 5196 return result 5197 5198 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 5199 return self._parse_csv(self._parse_bitwise) if self._match_text_seq("BY") else None 5200 5201 def _parse_locks(self) -> t.List[exp.Lock]: 5202 locks = [] 5203 while True: 5204 update, key = None, None 5205 if self._match_text_seq("FOR", "UPDATE"): 5206 update = True 5207 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 5208 "LOCK", "IN", "SHARE", "MODE" 5209 ): 5210 update = False 5211 elif self._match_text_seq("FOR", "KEY", "SHARE"): 5212 update, key = False, True 5213 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 5214 update, key = True, True 5215 else: 5216 break 5217 5218 expressions = None 5219 if self._match_text_seq("OF"): 5220 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 5221 5222 wait: t.Optional[bool | exp.Expression] = None 5223 if self._match_text_seq("NOWAIT"): 5224 wait = True 5225 elif self._match_text_seq("WAIT"): 5226 wait = self._parse_primary() 5227 elif self._match_text_seq("SKIP", "LOCKED"): 5228 wait = False 5229 5230 locks.append( 5231 self.expression( 5232 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 5233 ) 5234 ) 5235 5236 return locks 5237 5238 def parse_set_operation( 5239 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 5240 ) -> t.Optional[exp.Expression]: 5241 start = self._index 5242 _, side_token, kind_token = self._parse_join_parts() 5243 5244 side = side_token.text if side_token else None 5245 kind = kind_token.text if kind_token else None 5246 5247 if not self._match_set(self.SET_OPERATIONS): 5248 self._retreat(start) 5249 return None 5250 5251 token_type = self._prev.token_type 5252 5253 if token_type == TokenType.UNION: 5254 operation: t.Type[exp.SetOperation] = exp.Union 5255 elif token_type == TokenType.EXCEPT: 5256 operation = exp.Except 5257 else: 5258 operation = exp.Intersect 5259 5260 comments = self._prev.comments 5261 5262 if self._match(TokenType.DISTINCT): 5263 distinct: t.Optional[bool] = True 5264 elif self._match(TokenType.ALL): 5265 distinct = False 5266 else: 5267 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 5268 if distinct is None: 5269 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 5270 5271 by_name = ( 5272 self._match_text_seq("BY", "NAME") 5273 or self._match_text_seq("STRICT", "CORRESPONDING") 5274 or None 5275 ) 5276 if self._match_text_seq("CORRESPONDING"): 5277 by_name = True 5278 if not side and not kind: 5279 kind = "INNER" 5280 5281 on_column_list = None 5282 if by_name and self._match_texts(("ON", "BY")): 5283 on_column_list = self._parse_wrapped_csv(self._parse_column) 5284 5285 expression = self._parse_select( 5286 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 5287 ) 5288 5289 return self.expression( 5290 operation, 5291 comments=comments, 5292 this=this, 5293 distinct=distinct, 5294 by_name=by_name, 5295 expression=expression, 5296 side=side, 5297 kind=kind, 5298 on=on_column_list, 5299 ) 5300 5301 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5302 while this: 5303 setop = self.parse_set_operation(this) 5304 if not setop: 5305 break 5306 this = setop 5307 5308 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 5309 expression = this.expression 5310 5311 if expression: 5312 for arg in self.SET_OP_MODIFIERS: 5313 expr = expression.args.get(arg) 5314 if expr: 5315 this.set(arg, expr.pop()) 5316 5317 return this 5318 5319 def _parse_expression(self) -> t.Optional[exp.Expression]: 5320 return self._parse_alias(self._parse_assignment()) 5321 5322 def _parse_assignment(self) -> t.Optional[exp.Expression]: 5323 this = self._parse_disjunction() 5324 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 5325 # This allows us to parse <non-identifier token> := <expr> 5326 this = exp.column( 5327 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 5328 ) 5329 5330 while self._match_set(self.ASSIGNMENT): 5331 if isinstance(this, exp.Column) and len(this.parts) == 1: 5332 this = this.this 5333 5334 this = self.expression( 5335 self.ASSIGNMENT[self._prev.token_type], 5336 this=this, 5337 comments=self._prev_comments, 5338 expression=self._parse_assignment(), 5339 ) 5340 5341 return this 5342 5343 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 5344 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 5345 5346 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 5347 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 5348 5349 def _parse_equality(self) -> t.Optional[exp.Expression]: 5350 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 5351 5352 def _parse_comparison(self) -> t.Optional[exp.Expression]: 5353 return self._parse_tokens(self._parse_range, self.COMPARISON) 5354 5355 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5356 this = this or self._parse_bitwise() 5357 negate = self._match(TokenType.NOT) 5358 5359 if self._match_set(self.RANGE_PARSERS): 5360 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 5361 if not expression: 5362 return this 5363 5364 this = expression 5365 elif self._match(TokenType.ISNULL) or (negate and self._match(TokenType.NULL)): 5366 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5367 5368 # Postgres supports ISNULL and NOTNULL for conditions. 5369 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5370 if self._match(TokenType.NOTNULL): 5371 this = self.expression(exp.Is, this=this, expression=exp.Null()) 5372 this = self.expression(exp.Not, this=this) 5373 5374 if negate: 5375 this = self._negate_range(this) 5376 5377 if self._match(TokenType.IS): 5378 this = self._parse_is(this) 5379 5380 return this 5381 5382 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5383 if not this: 5384 return this 5385 5386 return self.expression(exp.Not, this=this) 5387 5388 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5389 index = self._index - 1 5390 negate = self._match(TokenType.NOT) 5391 5392 if self._match_text_seq("DISTINCT", "FROM"): 5393 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5394 return self.expression(klass, this=this, expression=self._parse_bitwise()) 5395 5396 if self._match(TokenType.JSON): 5397 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5398 5399 if self._match_text_seq("WITH"): 5400 _with = True 5401 elif self._match_text_seq("WITHOUT"): 5402 _with = False 5403 else: 5404 _with = None 5405 5406 unique = self._match(TokenType.UNIQUE) 5407 self._match_text_seq("KEYS") 5408 expression: t.Optional[exp.Expression] = self.expression( 5409 exp.JSON, 5410 this=kind, 5411 with_=_with, 5412 unique=unique, 5413 ) 5414 else: 5415 expression = self._parse_null() or self._parse_bitwise() 5416 if not expression: 5417 self._retreat(index) 5418 return None 5419 5420 this = self.expression(exp.Is, this=this, expression=expression) 5421 this = self.expression(exp.Not, this=this) if negate else this 5422 return self._parse_column_ops(this) 5423 5424 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5425 unnest = self._parse_unnest(with_alias=False) 5426 if unnest: 5427 this = self.expression(exp.In, this=this, unnest=unnest) 5428 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5429 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5430 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5431 5432 if len(expressions) == 1 and isinstance(query := expressions[0], exp.Query): 5433 this = self.expression( 5434 exp.In, 5435 this=this, 5436 query=self._parse_query_modifiers(query).subquery(copy=False), 5437 ) 5438 else: 5439 this = self.expression(exp.In, this=this, expressions=expressions) 5440 5441 if matched_l_paren: 5442 self._match_r_paren(this) 5443 elif not self._match(TokenType.R_BRACKET, expression=this): 5444 self.raise_error("Expecting ]") 5445 else: 5446 this = self.expression(exp.In, this=this, field=self._parse_column()) 5447 5448 return this 5449 5450 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5451 symmetric = None 5452 if self._match_text_seq("SYMMETRIC"): 5453 symmetric = True 5454 elif self._match_text_seq("ASYMMETRIC"): 5455 symmetric = False 5456 5457 low = self._parse_bitwise() 5458 self._match(TokenType.AND) 5459 high = self._parse_bitwise() 5460 5461 return self.expression( 5462 exp.Between, 5463 this=this, 5464 low=low, 5465 high=high, 5466 symmetric=symmetric, 5467 ) 5468 5469 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5470 if not self._match(TokenType.ESCAPE): 5471 return this 5472 return self.expression( 5473 exp.Escape, this=this, expression=self._parse_string() or self._parse_null() 5474 ) 5475 5476 def _parse_interval_span(self, this: exp.Expression) -> exp.Interval: 5477 # handle day-time format interval span with omitted units: 5478 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5479 interval_span_units_omitted = None 5480 if ( 5481 this 5482 and this.is_string 5483 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5484 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5485 ): 5486 index = self._index 5487 5488 # Var "TO" Var 5489 first_unit = self._parse_var(any_token=True, upper=True) 5490 second_unit = None 5491 if first_unit and self._match_text_seq("TO"): 5492 second_unit = self._parse_var(any_token=True, upper=True) 5493 5494 interval_span_units_omitted = not (first_unit and second_unit) 5495 5496 self._retreat(index) 5497 5498 unit = ( 5499 None 5500 if interval_span_units_omitted 5501 else ( 5502 self._parse_function() 5503 or ( 5504 not self._match_set((TokenType.ALIAS, TokenType.DCOLON), advance=False) 5505 and self._parse_var(any_token=True, upper=True) 5506 ) 5507 ) 5508 ) 5509 5510 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5511 # each INTERVAL expression into this canonical form so it's easy to transpile 5512 if this and this.is_number: 5513 this = exp.Literal.string(this.to_py()) 5514 elif this and this.is_string: 5515 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5516 if parts and unit: 5517 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5518 unit = None 5519 self._retreat(self._index - 1) 5520 5521 if len(parts) == 1: 5522 this = exp.Literal.string(parts[0][0]) 5523 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5524 5525 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5526 unit = self.expression( 5527 exp.IntervalSpan, 5528 this=unit, 5529 expression=self._parse_function() or self._parse_var(any_token=True, upper=True), 5530 ) 5531 5532 return self.expression(exp.Interval, this=this, unit=unit) 5533 5534 def _parse_interval(self, require_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5535 index = self._index 5536 5537 if not self._match(TokenType.INTERVAL) and require_interval: 5538 return None 5539 5540 if self._match(TokenType.STRING, advance=False): 5541 this = self._parse_primary() 5542 else: 5543 this = self._parse_term() 5544 5545 if not this or ( 5546 isinstance(this, exp.Column) 5547 and not this.table 5548 and not this.this.quoted 5549 and self._curr 5550 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 5551 ): 5552 self._retreat(index) 5553 return None 5554 5555 interval = self._parse_interval_span(this) 5556 5557 index = self._index 5558 self._match(TokenType.PLUS) 5559 5560 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5561 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5562 return self.expression(exp.Add, this=interval, expression=self._parse_interval(False)) 5563 5564 self._retreat(index) 5565 return interval 5566 5567 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5568 this = self._parse_term() 5569 5570 while True: 5571 if self._match_set(self.BITWISE): 5572 this = self.expression( 5573 self.BITWISE[self._prev.token_type], 5574 this=this, 5575 expression=self._parse_term(), 5576 ) 5577 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5578 this = self.expression( 5579 exp.DPipe, 5580 this=this, 5581 expression=self._parse_term(), 5582 safe=not self.dialect.STRICT_STRING_CONCAT, 5583 ) 5584 elif self._match(TokenType.DQMARK): 5585 this = self.expression( 5586 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5587 ) 5588 elif self._match_pair(TokenType.LT, TokenType.LT): 5589 this = self.expression( 5590 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5591 ) 5592 elif self._match_pair(TokenType.GT, TokenType.GT): 5593 this = self.expression( 5594 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5595 ) 5596 else: 5597 break 5598 5599 return this 5600 5601 def _parse_term(self) -> t.Optional[exp.Expression]: 5602 this = self._parse_factor() 5603 5604 while self._match_set(self.TERM): 5605 klass = self.TERM[self._prev.token_type] 5606 comments = self._prev_comments 5607 expression = self._parse_factor() 5608 5609 this = self.expression(klass, this=this, comments=comments, expression=expression) 5610 5611 if isinstance(this, exp.Collate): 5612 expr = this.expression 5613 5614 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5615 # fallback to Identifier / Var 5616 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5617 ident = expr.this 5618 if isinstance(ident, exp.Identifier): 5619 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5620 5621 return this 5622 5623 def _parse_factor(self) -> t.Optional[exp.Expression]: 5624 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5625 this = self._parse_at_time_zone(parse_method()) 5626 5627 while self._match_set(self.FACTOR): 5628 klass = self.FACTOR[self._prev.token_type] 5629 comments = self._prev_comments 5630 expression = parse_method() 5631 5632 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5633 self._retreat(self._index - 1) 5634 return this 5635 5636 this = self.expression(klass, this=this, comments=comments, expression=expression) 5637 5638 if isinstance(this, exp.Div): 5639 this.set("typed", self.dialect.TYPED_DIVISION) 5640 this.set("safe", self.dialect.SAFE_DIVISION) 5641 5642 return this 5643 5644 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5645 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5646 5647 def _parse_unary(self) -> t.Optional[exp.Expression]: 5648 if self._match_set(self.UNARY_PARSERS): 5649 return self.UNARY_PARSERS[self._prev.token_type](self) 5650 return self._parse_type() 5651 5652 def _parse_type( 5653 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5654 ) -> t.Optional[exp.Expression]: 5655 if interval := parse_interval and self._parse_interval(): 5656 return self._parse_column_ops(interval) 5657 5658 index = self._index 5659 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5660 5661 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5662 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5663 if isinstance(data_type, exp.Cast): 5664 # This constructor can contain ops directly after it, for instance struct unnesting: 5665 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5666 return self._parse_column_ops(data_type) 5667 5668 if data_type: 5669 index2 = self._index 5670 this = self._parse_primary() 5671 5672 if isinstance(this, exp.Literal): 5673 literal = this.name 5674 this = self._parse_column_ops(this) 5675 5676 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5677 if parser: 5678 return parser(self, this, data_type) 5679 5680 if ( 5681 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5682 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5683 and TIME_ZONE_RE.search(literal) 5684 ): 5685 data_type = exp.DataType.build("TIMESTAMPTZ") 5686 5687 return self.expression(exp.Cast, this=this, to=data_type) 5688 5689 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5690 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5691 # 5692 # If the index difference here is greater than 1, that means the parser itself must have 5693 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5694 # 5695 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5696 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5697 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5698 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5699 # 5700 # In these cases, we don't really want to return the converted type, but instead retreat 5701 # and try to parse a Column or Identifier in the section below. 5702 if data_type.expressions and index2 - index > 1: 5703 self._retreat(index2) 5704 return self._parse_column_ops(data_type) 5705 5706 self._retreat(index) 5707 5708 if fallback_to_identifier: 5709 return self._parse_id_var() 5710 5711 this = self._parse_column() 5712 if this: 5713 this = self._parse_column_ops(this) 5714 if this and self.COLON_IS_VARIANT_EXTRACT: 5715 this = self._parse_colon_as_variant_extract(this) 5716 5717 return this 5718 5719 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5720 this = self._parse_type() 5721 if not this: 5722 return None 5723 5724 if isinstance(this, exp.Column) and not this.table: 5725 this = exp.var(this.name.upper()) 5726 5727 return self.expression( 5728 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5729 ) 5730 5731 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5732 type_name = identifier.name 5733 5734 while self._match(TokenType.DOT): 5735 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5736 5737 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5738 5739 def _parse_types( 5740 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5741 ) -> t.Optional[exp.Expression]: 5742 index = self._index 5743 this: t.Optional[exp.Expression] = None 5744 5745 if self._match_set(self.TYPE_TOKENS): 5746 type_token = self._prev.token_type 5747 else: 5748 type_token = None 5749 identifier = allow_identifiers and self._parse_id_var( 5750 any_token=False, tokens=(TokenType.VAR,) 5751 ) 5752 if isinstance(identifier, exp.Identifier): 5753 try: 5754 tokens = self.dialect.tokenize(identifier.name) 5755 except TokenError: 5756 tokens = None 5757 5758 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5759 type_token = tokens[0].token_type 5760 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5761 this = self._parse_user_defined_type(identifier) 5762 else: 5763 self._retreat(self._index - 1) 5764 return None 5765 else: 5766 return None 5767 5768 if type_token == TokenType.PSEUDO_TYPE: 5769 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5770 5771 if type_token == TokenType.OBJECT_IDENTIFIER: 5772 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5773 5774 # https://materialize.com/docs/sql/types/map/ 5775 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5776 key_type = self._parse_types( 5777 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5778 ) 5779 if not self._match(TokenType.FARROW): 5780 self._retreat(index) 5781 return None 5782 5783 value_type = self._parse_types( 5784 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5785 ) 5786 if not self._match(TokenType.R_BRACKET): 5787 self._retreat(index) 5788 return None 5789 5790 return exp.DataType( 5791 this=exp.DataType.Type.MAP, 5792 expressions=[key_type, value_type], 5793 nested=True, 5794 ) 5795 5796 nested = type_token in self.NESTED_TYPE_TOKENS 5797 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5798 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5799 expressions = None 5800 maybe_func = False 5801 5802 if self._match(TokenType.L_PAREN): 5803 if is_struct: 5804 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5805 elif nested: 5806 expressions = self._parse_csv( 5807 lambda: self._parse_types( 5808 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5809 ) 5810 ) 5811 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5812 this = expressions[0] 5813 this.set("nullable", True) 5814 self._match_r_paren() 5815 return this 5816 elif type_token in self.ENUM_TYPE_TOKENS: 5817 expressions = self._parse_csv(self._parse_equality) 5818 elif is_aggregate: 5819 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5820 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5821 ) 5822 if not func_or_ident: 5823 return None 5824 expressions = [func_or_ident] 5825 if self._match(TokenType.COMMA): 5826 expressions.extend( 5827 self._parse_csv( 5828 lambda: self._parse_types( 5829 check_func=check_func, 5830 schema=schema, 5831 allow_identifiers=allow_identifiers, 5832 ) 5833 ) 5834 ) 5835 else: 5836 expressions = self._parse_csv(self._parse_type_size) 5837 5838 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5839 if type_token == TokenType.VECTOR and len(expressions) == 2: 5840 expressions = self._parse_vector_expressions(expressions) 5841 5842 if not self._match(TokenType.R_PAREN): 5843 self._retreat(index) 5844 return None 5845 5846 maybe_func = True 5847 5848 values: t.Optional[t.List[exp.Expression]] = None 5849 5850 if nested and self._match(TokenType.LT): 5851 if is_struct: 5852 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5853 else: 5854 expressions = self._parse_csv( 5855 lambda: self._parse_types( 5856 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5857 ) 5858 ) 5859 5860 if not self._match(TokenType.GT): 5861 self.raise_error("Expecting >") 5862 5863 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5864 values = self._parse_csv(self._parse_disjunction) 5865 if not values and is_struct: 5866 values = None 5867 self._retreat(self._index - 1) 5868 else: 5869 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5870 5871 if type_token in self.TIMESTAMPS: 5872 if self._match_text_seq("WITH", "TIME", "ZONE"): 5873 maybe_func = False 5874 tz_type = ( 5875 exp.DataType.Type.TIMETZ 5876 if type_token in self.TIMES 5877 else exp.DataType.Type.TIMESTAMPTZ 5878 ) 5879 this = exp.DataType(this=tz_type, expressions=expressions) 5880 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5881 maybe_func = False 5882 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5883 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5884 maybe_func = False 5885 elif type_token == TokenType.INTERVAL: 5886 if self._curr and self._curr.text.upper() in self.dialect.VALID_INTERVAL_UNITS: 5887 unit = self._parse_var(upper=True) 5888 if self._match_text_seq("TO"): 5889 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5890 5891 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5892 else: 5893 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5894 elif type_token == TokenType.VOID: 5895 this = exp.DataType(this=exp.DataType.Type.NULL) 5896 5897 if maybe_func and check_func: 5898 index2 = self._index 5899 peek = self._parse_string() 5900 5901 if not peek: 5902 self._retreat(index) 5903 return None 5904 5905 self._retreat(index2) 5906 5907 if not this: 5908 if self._match_text_seq("UNSIGNED"): 5909 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5910 if not unsigned_type_token: 5911 self.raise_error(f"Cannot convert {type_token.name} to unsigned.") 5912 5913 type_token = unsigned_type_token or type_token 5914 5915 # NULLABLE without parentheses can be a column (Presto/Trino) 5916 if type_token == TokenType.NULLABLE and not expressions: 5917 self._retreat(index) 5918 return None 5919 5920 this = exp.DataType( 5921 this=exp.DataType.Type[type_token.name], 5922 expressions=expressions, 5923 nested=nested, 5924 ) 5925 5926 # Empty arrays/structs are allowed 5927 if values is not None: 5928 cls = exp.Struct if is_struct else exp.Array 5929 this = exp.cast(cls(expressions=values), this, copy=False) 5930 5931 elif expressions: 5932 this.set("expressions", expressions) 5933 5934 # https://materialize.com/docs/sql/types/list/#type-name 5935 while self._match(TokenType.LIST): 5936 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5937 5938 index = self._index 5939 5940 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5941 matched_array = self._match(TokenType.ARRAY) 5942 5943 while self._curr: 5944 datatype_token = self._prev.token_type 5945 matched_l_bracket = self._match(TokenType.L_BRACKET) 5946 5947 if (not matched_l_bracket and not matched_array) or ( 5948 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5949 ): 5950 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5951 # not to be confused with the fixed size array parsing 5952 break 5953 5954 matched_array = False 5955 values = self._parse_csv(self._parse_disjunction) or None 5956 if ( 5957 values 5958 and not schema 5959 and ( 5960 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS 5961 or datatype_token == TokenType.ARRAY 5962 or not self._match(TokenType.R_BRACKET, advance=False) 5963 ) 5964 ): 5965 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5966 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5967 self._retreat(index) 5968 break 5969 5970 this = exp.DataType( 5971 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5972 ) 5973 self._match(TokenType.R_BRACKET) 5974 5975 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5976 converter = self.TYPE_CONVERTERS.get(this.this) 5977 if converter: 5978 this = converter(t.cast(exp.DataType, this)) 5979 5980 return this 5981 5982 def _parse_vector_expressions( 5983 self, expressions: t.List[exp.Expression] 5984 ) -> t.List[exp.Expression]: 5985 return [exp.DataType.build(expressions[0].name, dialect=self.dialect), *expressions[1:]] 5986 5987 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5988 index = self._index 5989 5990 if ( 5991 self._curr 5992 and self._next 5993 and self._curr.token_type in self.TYPE_TOKENS 5994 and self._next.token_type in self.TYPE_TOKENS 5995 ): 5996 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5997 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5998 this = self._parse_id_var() 5999 else: 6000 this = ( 6001 self._parse_type(parse_interval=False, fallback_to_identifier=True) 6002 or self._parse_id_var() 6003 ) 6004 6005 self._match(TokenType.COLON) 6006 6007 if ( 6008 type_required 6009 and not isinstance(this, exp.DataType) 6010 and not self._match_set(self.TYPE_TOKENS, advance=False) 6011 ): 6012 self._retreat(index) 6013 return self._parse_types() 6014 6015 return self._parse_column_def(this) 6016 6017 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6018 if not self._match_text_seq("AT", "TIME", "ZONE"): 6019 return this 6020 return self._parse_at_time_zone( 6021 self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 6022 ) 6023 6024 def _parse_column(self) -> t.Optional[exp.Expression]: 6025 this = self._parse_column_reference() 6026 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 6027 6028 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 6029 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 6030 6031 return column 6032 6033 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 6034 this = self._parse_field() 6035 if ( 6036 not this 6037 and self._match(TokenType.VALUES, advance=False) 6038 and self.VALUES_FOLLOWED_BY_PAREN 6039 and (not self._next or self._next.token_type != TokenType.L_PAREN) 6040 ): 6041 this = self._parse_id_var() 6042 6043 if isinstance(this, exp.Identifier): 6044 # We bubble up comments from the Identifier to the Column 6045 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 6046 6047 return this 6048 6049 def _parse_colon_as_variant_extract( 6050 self, this: t.Optional[exp.Expression] 6051 ) -> t.Optional[exp.Expression]: 6052 casts = [] 6053 json_path = [] 6054 escape = None 6055 6056 while self._match(TokenType.COLON): 6057 start_index = self._index 6058 6059 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 6060 path = self._parse_column_ops( 6061 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 6062 ) 6063 6064 # The cast :: operator has a lower precedence than the extraction operator :, so 6065 # we rearrange the AST appropriately to avoid casting the JSON path 6066 while isinstance(path, exp.Cast): 6067 casts.append(path.to) 6068 path = path.this 6069 6070 if casts: 6071 dcolon_offset = next( 6072 i 6073 for i, t in enumerate(self._tokens[start_index:]) 6074 if t.token_type == TokenType.DCOLON 6075 ) 6076 end_token = self._tokens[start_index + dcolon_offset - 1] 6077 else: 6078 end_token = self._prev 6079 6080 if path: 6081 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 6082 # it'll roundtrip to a string literal in GET_PATH 6083 if isinstance(path, exp.Identifier) and path.quoted: 6084 escape = True 6085 6086 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 6087 6088 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 6089 # Databricks transforms it back to the colon/dot notation 6090 if json_path: 6091 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 6092 6093 if json_path_expr: 6094 json_path_expr.set("escape", escape) 6095 6096 this = self.expression( 6097 exp.JSONExtract, 6098 this=this, 6099 expression=json_path_expr, 6100 variant_extract=True, 6101 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 6102 ) 6103 6104 while casts: 6105 this = self.expression(exp.Cast, this=this, to=casts.pop()) 6106 6107 return this 6108 6109 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 6110 return self._parse_types() 6111 6112 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6113 this = self._parse_bracket(this) 6114 6115 column_operators = self.COLUMN_OPERATORS 6116 cast_column_operators = self.CAST_COLUMN_OPERATORS 6117 while self._curr: 6118 op_token = self._curr.token_type 6119 6120 if op_token not in column_operators: 6121 break 6122 op = column_operators[op_token] 6123 self._advance() 6124 6125 if op_token in cast_column_operators: 6126 field = self._parse_dcolon() 6127 if not field: 6128 self.raise_error("Expected type") 6129 elif op and self._curr: 6130 field = self._parse_column_reference() or self._parse_bitwise() 6131 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 6132 field = self._parse_column_ops(field) 6133 else: 6134 field = self._parse_field(any_token=True, anonymous_func=True) 6135 6136 # Function calls can be qualified, e.g., x.y.FOO() 6137 # This converts the final AST to a series of Dots leading to the function call 6138 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 6139 if isinstance(field, (exp.Func, exp.Window)) and this: 6140 this = this.transform( 6141 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 6142 ) 6143 6144 if op: 6145 this = op(self, this, field) 6146 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 6147 this = self.expression( 6148 exp.Column, 6149 comments=this.comments, 6150 this=field, 6151 table=this.this, 6152 db=this.args.get("table"), 6153 catalog=this.args.get("db"), 6154 ) 6155 elif isinstance(field, exp.Window): 6156 # Move the exp.Dot's to the window's function 6157 window_func = self.expression(exp.Dot, this=this, expression=field.this) 6158 field.set("this", window_func) 6159 this = field 6160 else: 6161 this = self.expression(exp.Dot, this=this, expression=field) 6162 6163 if field and field.comments: 6164 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 6165 6166 this = self._parse_bracket(this) 6167 6168 return this 6169 6170 def _parse_paren(self) -> t.Optional[exp.Expression]: 6171 if not self._match(TokenType.L_PAREN): 6172 return None 6173 6174 comments = self._prev_comments 6175 query = self._parse_select() 6176 6177 if query: 6178 expressions = [query] 6179 else: 6180 expressions = self._parse_expressions() 6181 6182 this = seq_get(expressions, 0) 6183 6184 if not this and self._match(TokenType.R_PAREN, advance=False): 6185 this = self.expression(exp.Tuple) 6186 elif isinstance(this, exp.UNWRAPPED_QUERIES): 6187 this = self._parse_subquery(this=this, parse_alias=False) 6188 elif isinstance(this, (exp.Subquery, exp.Values)): 6189 this = self._parse_subquery( 6190 this=self._parse_query_modifiers(self._parse_set_operations(this)), 6191 parse_alias=False, 6192 ) 6193 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 6194 this = self.expression(exp.Tuple, expressions=expressions) 6195 else: 6196 this = self.expression(exp.Paren, this=this) 6197 6198 if this: 6199 this.add_comments(comments) 6200 6201 self._match_r_paren(expression=this) 6202 6203 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 6204 return self._parse_window(this) 6205 6206 return this 6207 6208 def _parse_primary(self) -> t.Optional[exp.Expression]: 6209 if self._match_set(self.PRIMARY_PARSERS): 6210 token_type = self._prev.token_type 6211 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 6212 6213 if token_type == TokenType.STRING: 6214 expressions = [primary] 6215 while self._match(TokenType.STRING): 6216 expressions.append(exp.Literal.string(self._prev.text)) 6217 6218 if len(expressions) > 1: 6219 return self.expression( 6220 exp.Concat, expressions=expressions, coalesce=self.dialect.CONCAT_COALESCE 6221 ) 6222 6223 return primary 6224 6225 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 6226 return exp.Literal.number(f"0.{self._prev.text}") 6227 6228 return self._parse_paren() 6229 6230 def _parse_field( 6231 self, 6232 any_token: bool = False, 6233 tokens: t.Optional[t.Collection[TokenType]] = None, 6234 anonymous_func: bool = False, 6235 ) -> t.Optional[exp.Expression]: 6236 if anonymous_func: 6237 field = ( 6238 self._parse_function(anonymous=anonymous_func, any_token=any_token) 6239 or self._parse_primary() 6240 ) 6241 else: 6242 field = self._parse_primary() or self._parse_function( 6243 anonymous=anonymous_func, any_token=any_token 6244 ) 6245 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 6246 6247 def _parse_function( 6248 self, 6249 functions: t.Optional[t.Dict[str, t.Callable]] = None, 6250 anonymous: bool = False, 6251 optional_parens: bool = True, 6252 any_token: bool = False, 6253 ) -> t.Optional[exp.Expression]: 6254 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 6255 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 6256 fn_syntax = False 6257 if ( 6258 self._match(TokenType.L_BRACE, advance=False) 6259 and self._next 6260 and self._next.text.upper() == "FN" 6261 ): 6262 self._advance(2) 6263 fn_syntax = True 6264 6265 func = self._parse_function_call( 6266 functions=functions, 6267 anonymous=anonymous, 6268 optional_parens=optional_parens, 6269 any_token=any_token, 6270 ) 6271 6272 if fn_syntax: 6273 self._match(TokenType.R_BRACE) 6274 6275 return func 6276 6277 def _parse_function_args(self, alias: bool = False) -> t.List[exp.Expression]: 6278 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 6279 6280 def _parse_function_call( 6281 self, 6282 functions: t.Optional[t.Dict[str, t.Callable]] = None, 6283 anonymous: bool = False, 6284 optional_parens: bool = True, 6285 any_token: bool = False, 6286 ) -> t.Optional[exp.Expression]: 6287 if not self._curr: 6288 return None 6289 6290 comments = self._curr.comments 6291 prev = self._prev 6292 token = self._curr 6293 token_type = self._curr.token_type 6294 this = self._curr.text 6295 upper = this.upper() 6296 6297 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 6298 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 6299 self._advance() 6300 return self._parse_window(parser(self)) 6301 6302 if not self._next or self._next.token_type != TokenType.L_PAREN: 6303 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 6304 self._advance() 6305 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 6306 6307 return None 6308 6309 if any_token: 6310 if token_type in self.RESERVED_TOKENS: 6311 return None 6312 elif token_type not in self.FUNC_TOKENS: 6313 return None 6314 6315 self._advance(2) 6316 6317 parser = self.FUNCTION_PARSERS.get(upper) 6318 if parser and not anonymous: 6319 this = parser(self) 6320 else: 6321 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 6322 6323 if subquery_predicate: 6324 expr = None 6325 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 6326 expr = self._parse_select() 6327 self._match_r_paren() 6328 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 6329 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 6330 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 6331 self._advance(-1) 6332 expr = self._parse_bitwise() 6333 6334 if expr: 6335 return self.expression(subquery_predicate, comments=comments, this=expr) 6336 6337 if functions is None: 6338 functions = self.FUNCTIONS 6339 6340 function = functions.get(upper) 6341 known_function = function and not anonymous 6342 6343 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 6344 args = self._parse_function_args(alias) 6345 6346 post_func_comments = self._curr and self._curr.comments 6347 if known_function and post_func_comments: 6348 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 6349 # call we'll construct it as exp.Anonymous, even if it's "known" 6350 if any( 6351 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 6352 for comment in post_func_comments 6353 ): 6354 known_function = False 6355 6356 if alias and known_function: 6357 args = self._kv_to_prop_eq(args) 6358 6359 if known_function: 6360 func_builder = t.cast(t.Callable, function) 6361 6362 if "dialect" in func_builder.__code__.co_varnames: 6363 func = func_builder(args, dialect=self.dialect) 6364 else: 6365 func = func_builder(args) 6366 6367 func = self.validate_expression(func, args) 6368 if self.dialect.PRESERVE_ORIGINAL_NAMES: 6369 func.meta["name"] = this 6370 6371 this = func 6372 else: 6373 if token_type == TokenType.IDENTIFIER: 6374 this = exp.Identifier(this=this, quoted=True).update_positions(token) 6375 6376 this = self.expression(exp.Anonymous, this=this, expressions=args) 6377 6378 this = this.update_positions(token) 6379 6380 if isinstance(this, exp.Expression): 6381 this.add_comments(comments) 6382 6383 self._match_r_paren(this) 6384 return self._parse_window(this) 6385 6386 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 6387 return expression 6388 6389 def _kv_to_prop_eq( 6390 self, expressions: t.List[exp.Expression], parse_map: bool = False 6391 ) -> t.List[exp.Expression]: 6392 transformed = [] 6393 6394 for index, e in enumerate(expressions): 6395 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 6396 if isinstance(e, exp.Alias): 6397 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 6398 6399 if not isinstance(e, exp.PropertyEQ): 6400 e = self.expression( 6401 exp.PropertyEQ, 6402 this=e.this if parse_map else exp.to_identifier(e.this.name), 6403 expression=e.expression, 6404 ) 6405 6406 if isinstance(e.this, exp.Column): 6407 e.this.replace(e.this.this) 6408 else: 6409 e = self._to_prop_eq(e, index) 6410 6411 transformed.append(e) 6412 6413 return transformed 6414 6415 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 6416 return self._parse_statement() 6417 6418 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 6419 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 6420 6421 def _parse_user_defined_function( 6422 self, kind: t.Optional[TokenType] = None 6423 ) -> t.Optional[exp.Expression]: 6424 this = self._parse_table_parts(schema=True) 6425 6426 if not self._match(TokenType.L_PAREN): 6427 return this 6428 6429 expressions = self._parse_csv(self._parse_function_parameter) 6430 self._match_r_paren() 6431 return self.expression( 6432 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 6433 ) 6434 6435 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 6436 literal = self._parse_primary() 6437 if literal: 6438 return self.expression(exp.Introducer, token=token, expression=literal) 6439 6440 return self._identifier_expression(token) 6441 6442 def _parse_session_parameter(self) -> exp.SessionParameter: 6443 kind = None 6444 this = self._parse_id_var() or self._parse_primary() 6445 6446 if this and self._match(TokenType.DOT): 6447 kind = this.name 6448 this = self._parse_var() or self._parse_primary() 6449 6450 return self.expression(exp.SessionParameter, this=this, kind=kind) 6451 6452 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 6453 return self._parse_id_var() 6454 6455 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 6456 index = self._index 6457 6458 if self._match(TokenType.L_PAREN): 6459 expressions = t.cast( 6460 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 6461 ) 6462 6463 if not self._match(TokenType.R_PAREN): 6464 self._retreat(index) 6465 else: 6466 expressions = [self._parse_lambda_arg()] 6467 6468 if self._match_set(self.LAMBDAS): 6469 return self.LAMBDAS[self._prev.token_type](self, expressions) 6470 6471 self._retreat(index) 6472 6473 this: t.Optional[exp.Expression] 6474 6475 if self._match(TokenType.DISTINCT): 6476 this = self.expression( 6477 exp.Distinct, expressions=self._parse_csv(self._parse_disjunction) 6478 ) 6479 else: 6480 this = self._parse_select_or_expression(alias=alias) 6481 6482 return self._parse_limit( 6483 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6484 ) 6485 6486 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6487 index = self._index 6488 if not self._match(TokenType.L_PAREN): 6489 return this 6490 6491 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6492 # expr can be of both types 6493 if self._match_set(self.SELECT_START_TOKENS): 6494 self._retreat(index) 6495 return this 6496 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6497 self._match_r_paren() 6498 return self.expression(exp.Schema, this=this, expressions=args) 6499 6500 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6501 return self._parse_column_def(self._parse_field(any_token=True)) 6502 6503 def _parse_column_def( 6504 self, this: t.Optional[exp.Expression], computed_column: bool = True 6505 ) -> t.Optional[exp.Expression]: 6506 # column defs are not really columns, they're identifiers 6507 if isinstance(this, exp.Column): 6508 this = this.this 6509 6510 if not computed_column: 6511 self._match(TokenType.ALIAS) 6512 6513 kind = self._parse_types(schema=True) 6514 6515 if self._match_text_seq("FOR", "ORDINALITY"): 6516 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6517 6518 constraints: t.List[exp.Expression] = [] 6519 6520 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6521 ("ALIAS", "MATERIALIZED") 6522 ): 6523 persisted = self._prev.text.upper() == "MATERIALIZED" 6524 constraint_kind = exp.ComputedColumnConstraint( 6525 this=self._parse_disjunction(), 6526 persisted=persisted or self._match_text_seq("PERSISTED"), 6527 data_type=exp.Var(this="AUTO") 6528 if self._match_text_seq("AUTO") 6529 else self._parse_types(), 6530 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6531 ) 6532 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6533 elif not kind and self._match_set({TokenType.IN, TokenType.OUT}, advance=False): 6534 in_out_constraint = self.expression( 6535 exp.InOutColumnConstraint, 6536 input_=self._match(TokenType.IN), 6537 output=self._match(TokenType.OUT), 6538 ) 6539 constraints.append(in_out_constraint) 6540 kind = self._parse_types() 6541 elif ( 6542 kind 6543 and self._match(TokenType.ALIAS, advance=False) 6544 and ( 6545 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6546 or (self._next and self._next.token_type == TokenType.L_PAREN) 6547 ) 6548 ): 6549 self._advance() 6550 constraints.append( 6551 self.expression( 6552 exp.ColumnConstraint, 6553 kind=exp.ComputedColumnConstraint( 6554 this=self._parse_disjunction(), 6555 persisted=self._match_texts(("STORED", "VIRTUAL")) 6556 and self._prev.text.upper() == "STORED", 6557 ), 6558 ) 6559 ) 6560 6561 while True: 6562 constraint = self._parse_column_constraint() 6563 if not constraint: 6564 break 6565 constraints.append(constraint) 6566 6567 if not kind and not constraints: 6568 return this 6569 6570 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6571 6572 def _parse_auto_increment( 6573 self, 6574 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6575 start = None 6576 increment = None 6577 order = None 6578 6579 if self._match(TokenType.L_PAREN, advance=False): 6580 args = self._parse_wrapped_csv(self._parse_bitwise) 6581 start = seq_get(args, 0) 6582 increment = seq_get(args, 1) 6583 elif self._match_text_seq("START"): 6584 start = self._parse_bitwise() 6585 self._match_text_seq("INCREMENT") 6586 increment = self._parse_bitwise() 6587 if self._match_text_seq("ORDER"): 6588 order = True 6589 elif self._match_text_seq("NOORDER"): 6590 order = False 6591 6592 if start and increment: 6593 return exp.GeneratedAsIdentityColumnConstraint( 6594 start=start, increment=increment, this=False, order=order 6595 ) 6596 6597 return exp.AutoIncrementColumnConstraint() 6598 6599 def _parse_check_constraint(self) -> t.Optional[exp.CheckColumnConstraint]: 6600 if not self._match(TokenType.L_PAREN, advance=False): 6601 return None 6602 6603 return self.expression( 6604 exp.CheckColumnConstraint, 6605 this=self._parse_wrapped(self._parse_assignment), 6606 enforced=self._match_text_seq("ENFORCED"), 6607 ) 6608 6609 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6610 if not self._match_text_seq("REFRESH"): 6611 self._retreat(self._index - 1) 6612 return None 6613 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6614 6615 def _parse_compress(self) -> exp.CompressColumnConstraint: 6616 if self._match(TokenType.L_PAREN, advance=False): 6617 return self.expression( 6618 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6619 ) 6620 6621 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6622 6623 def _parse_generated_as_identity( 6624 self, 6625 ) -> ( 6626 exp.GeneratedAsIdentityColumnConstraint 6627 | exp.ComputedColumnConstraint 6628 | exp.GeneratedAsRowColumnConstraint 6629 ): 6630 if self._match_text_seq("BY", "DEFAULT"): 6631 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6632 this = self.expression( 6633 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6634 ) 6635 else: 6636 self._match_text_seq("ALWAYS") 6637 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6638 6639 self._match(TokenType.ALIAS) 6640 6641 if self._match_text_seq("ROW"): 6642 start = self._match_text_seq("START") 6643 if not start: 6644 self._match(TokenType.END) 6645 hidden = self._match_text_seq("HIDDEN") 6646 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6647 6648 identity = self._match_text_seq("IDENTITY") 6649 6650 if self._match(TokenType.L_PAREN): 6651 if self._match(TokenType.START_WITH): 6652 this.set("start", self._parse_bitwise()) 6653 if self._match_text_seq("INCREMENT", "BY"): 6654 this.set("increment", self._parse_bitwise()) 6655 if self._match_text_seq("MINVALUE"): 6656 this.set("minvalue", self._parse_bitwise()) 6657 if self._match_text_seq("MAXVALUE"): 6658 this.set("maxvalue", self._parse_bitwise()) 6659 6660 if self._match_text_seq("CYCLE"): 6661 this.set("cycle", True) 6662 elif self._match_text_seq("NO", "CYCLE"): 6663 this.set("cycle", False) 6664 6665 if not identity: 6666 this.set("expression", self._parse_range()) 6667 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6668 args = self._parse_csv(self._parse_bitwise) 6669 this.set("start", seq_get(args, 0)) 6670 this.set("increment", seq_get(args, 1)) 6671 6672 self._match_r_paren() 6673 6674 return this 6675 6676 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6677 self._match_text_seq("LENGTH") 6678 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6679 6680 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6681 if self._match_text_seq("NULL"): 6682 return self.expression(exp.NotNullColumnConstraint) 6683 if self._match_text_seq("CASESPECIFIC"): 6684 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6685 if self._match_text_seq("FOR", "REPLICATION"): 6686 return self.expression(exp.NotForReplicationColumnConstraint) 6687 6688 # Unconsume the `NOT` token 6689 self._retreat(self._index - 1) 6690 return None 6691 6692 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6693 this = self._parse_id_var() if self._match(TokenType.CONSTRAINT) else None 6694 6695 procedure_option_follows = ( 6696 self._match(TokenType.WITH, advance=False) 6697 and self._next 6698 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6699 ) 6700 6701 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6702 constraint = self.CONSTRAINT_PARSERS[self._prev.text.upper()](self) 6703 if not constraint: 6704 self._retreat(self._index - 1) 6705 return None 6706 6707 return self.expression(exp.ColumnConstraint, this=this, kind=constraint) 6708 6709 return this 6710 6711 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6712 if not self._match(TokenType.CONSTRAINT): 6713 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6714 6715 return self.expression( 6716 exp.Constraint, 6717 this=self._parse_id_var(), 6718 expressions=self._parse_unnamed_constraints(), 6719 ) 6720 6721 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6722 constraints = [] 6723 while True: 6724 constraint = self._parse_unnamed_constraint() or self._parse_function() 6725 if not constraint: 6726 break 6727 constraints.append(constraint) 6728 6729 return constraints 6730 6731 def _parse_unnamed_constraint( 6732 self, constraints: t.Optional[t.Collection[str]] = None 6733 ) -> t.Optional[exp.Expression]: 6734 index = self._index 6735 6736 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6737 constraints or self.CONSTRAINT_PARSERS 6738 ): 6739 return None 6740 6741 constraint = self._prev.text.upper() 6742 if constraint not in self.CONSTRAINT_PARSERS: 6743 self.raise_error(f"No parser found for schema constraint {constraint}.") 6744 6745 constraint = self.CONSTRAINT_PARSERS[constraint](self) 6746 if not constraint: 6747 self._retreat(index) 6748 6749 return constraint 6750 6751 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6752 return self._parse_id_var(any_token=False) 6753 6754 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6755 self._match_texts(("KEY", "INDEX")) 6756 return self.expression( 6757 exp.UniqueColumnConstraint, 6758 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6759 this=self._parse_schema(self._parse_unique_key()), 6760 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6761 on_conflict=self._parse_on_conflict(), 6762 options=self._parse_key_constraint_options(), 6763 ) 6764 6765 def _parse_key_constraint_options(self) -> t.List[str]: 6766 options = [] 6767 while True: 6768 if not self._curr: 6769 break 6770 6771 if self._match(TokenType.ON): 6772 action = None 6773 on = self._advance_any() and self._prev.text 6774 6775 if self._match_text_seq("NO", "ACTION"): 6776 action = "NO ACTION" 6777 elif self._match_text_seq("CASCADE"): 6778 action = "CASCADE" 6779 elif self._match_text_seq("RESTRICT"): 6780 action = "RESTRICT" 6781 elif self._match_pair(TokenType.SET, TokenType.NULL): 6782 action = "SET NULL" 6783 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6784 action = "SET DEFAULT" 6785 else: 6786 self.raise_error("Invalid key constraint") 6787 6788 options.append(f"ON {on} {action}") 6789 else: 6790 var = self._parse_var_from_options( 6791 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6792 ) 6793 if not var: 6794 break 6795 options.append(var.name) 6796 6797 return options 6798 6799 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6800 if match and not self._match(TokenType.REFERENCES): 6801 return None 6802 6803 expressions = None 6804 this = self._parse_table(schema=True) 6805 options = self._parse_key_constraint_options() 6806 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6807 6808 def _parse_foreign_key(self) -> exp.ForeignKey: 6809 expressions = ( 6810 self._parse_wrapped_id_vars() 6811 if not self._match(TokenType.REFERENCES, advance=False) 6812 else None 6813 ) 6814 reference = self._parse_references() 6815 on_options = {} 6816 6817 while self._match(TokenType.ON): 6818 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6819 self.raise_error("Expected DELETE or UPDATE") 6820 6821 kind = self._prev.text.lower() 6822 6823 if self._match_text_seq("NO", "ACTION"): 6824 action = "NO ACTION" 6825 elif self._match(TokenType.SET): 6826 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6827 action = "SET " + self._prev.text.upper() 6828 else: 6829 self._advance() 6830 action = self._prev.text.upper() 6831 6832 on_options[kind] = action 6833 6834 return self.expression( 6835 exp.ForeignKey, 6836 expressions=expressions, 6837 reference=reference, 6838 options=self._parse_key_constraint_options(), 6839 **on_options, # type: ignore 6840 ) 6841 6842 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6843 return self._parse_field() 6844 6845 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6846 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6847 self._retreat(self._index - 1) 6848 return None 6849 6850 id_vars = self._parse_wrapped_id_vars() 6851 return self.expression( 6852 exp.PeriodForSystemTimeConstraint, 6853 this=seq_get(id_vars, 0), 6854 expression=seq_get(id_vars, 1), 6855 ) 6856 6857 def _parse_primary_key( 6858 self, 6859 wrapped_optional: bool = False, 6860 in_props: bool = False, 6861 named_primary_key: bool = False, 6862 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6863 desc = ( 6864 self._prev.token_type == TokenType.DESC 6865 if self._match_set((TokenType.ASC, TokenType.DESC)) 6866 else None 6867 ) 6868 6869 this = None 6870 if ( 6871 named_primary_key 6872 and self._curr.text.upper() not in self.CONSTRAINT_PARSERS 6873 and self._next 6874 and self._next.token_type == TokenType.L_PAREN 6875 ): 6876 this = self._parse_id_var() 6877 6878 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6879 return self.expression( 6880 exp.PrimaryKeyColumnConstraint, 6881 desc=desc, 6882 options=self._parse_key_constraint_options(), 6883 ) 6884 6885 expressions = self._parse_wrapped_csv( 6886 self._parse_primary_key_part, optional=wrapped_optional 6887 ) 6888 6889 return self.expression( 6890 exp.PrimaryKey, 6891 this=this, 6892 expressions=expressions, 6893 include=self._parse_index_params(), 6894 options=self._parse_key_constraint_options(), 6895 ) 6896 6897 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6898 return self._parse_slice(self._parse_alias(self._parse_disjunction(), explicit=True)) 6899 6900 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6901 """ 6902 Parses a datetime column in ODBC format. We parse the column into the corresponding 6903 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6904 same as we did for `DATE('yyyy-mm-dd')`. 6905 6906 Reference: 6907 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6908 """ 6909 self._match(TokenType.VAR) 6910 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6911 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6912 if not self._match(TokenType.R_BRACE): 6913 self.raise_error("Expected }") 6914 return expression 6915 6916 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6917 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6918 return this 6919 6920 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6921 map_token = seq_get(self._tokens, self._index - 2) 6922 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6923 else: 6924 parse_map = False 6925 6926 bracket_kind = self._prev.token_type 6927 if ( 6928 bracket_kind == TokenType.L_BRACE 6929 and self._curr 6930 and self._curr.token_type == TokenType.VAR 6931 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6932 ): 6933 return self._parse_odbc_datetime_literal() 6934 6935 expressions = self._parse_csv( 6936 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6937 ) 6938 6939 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6940 self.raise_error("Expected ]") 6941 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6942 self.raise_error("Expected }") 6943 6944 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6945 if bracket_kind == TokenType.L_BRACE: 6946 this = self.expression( 6947 exp.Struct, 6948 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6949 ) 6950 elif not this: 6951 this = build_array_constructor( 6952 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6953 ) 6954 else: 6955 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6956 if constructor_type: 6957 return build_array_constructor( 6958 constructor_type, 6959 args=expressions, 6960 bracket_kind=bracket_kind, 6961 dialect=self.dialect, 6962 ) 6963 6964 expressions = apply_index_offset( 6965 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6966 ) 6967 this = self.expression( 6968 exp.Bracket, 6969 this=this, 6970 expressions=expressions, 6971 comments=this.pop_comments(), 6972 ) 6973 6974 self._add_comments(this) 6975 return self._parse_bracket(this) 6976 6977 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6978 if not self._match(TokenType.COLON): 6979 return this 6980 6981 if self._match_pair(TokenType.DASH, TokenType.COLON, advance=False): 6982 self._advance() 6983 end: t.Optional[exp.Expression] = -exp.Literal.number("1") 6984 else: 6985 end = self._parse_assignment() 6986 step = self._parse_unary() if self._match(TokenType.COLON) else None 6987 return self.expression(exp.Slice, this=this, expression=end, step=step) 6988 6989 def _parse_case(self) -> t.Optional[exp.Expression]: 6990 if self._match(TokenType.DOT, advance=False): 6991 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6992 self._retreat(self._index - 1) 6993 return None 6994 6995 ifs = [] 6996 default = None 6997 6998 comments = self._prev_comments 6999 expression = self._parse_disjunction() 7000 7001 while self._match(TokenType.WHEN): 7002 this = self._parse_disjunction() 7003 self._match(TokenType.THEN) 7004 then = self._parse_disjunction() 7005 ifs.append(self.expression(exp.If, this=this, true=then)) 7006 7007 if self._match(TokenType.ELSE): 7008 default = self._parse_disjunction() 7009 7010 if not self._match(TokenType.END): 7011 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 7012 default = exp.column("interval") 7013 else: 7014 self.raise_error("Expected END after CASE", self._prev) 7015 7016 return self.expression( 7017 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 7018 ) 7019 7020 def _parse_if(self) -> t.Optional[exp.Expression]: 7021 if self._match(TokenType.L_PAREN): 7022 args = self._parse_csv( 7023 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 7024 ) 7025 this = self.validate_expression(exp.If.from_arg_list(args), args) 7026 self._match_r_paren() 7027 else: 7028 index = self._index - 1 7029 7030 if self.NO_PAREN_IF_COMMANDS and index == 0: 7031 return self._parse_as_command(self._prev) 7032 7033 condition = self._parse_disjunction() 7034 7035 if not condition: 7036 self._retreat(index) 7037 return None 7038 7039 self._match(TokenType.THEN) 7040 true = self._parse_disjunction() 7041 false = self._parse_disjunction() if self._match(TokenType.ELSE) else None 7042 self._match(TokenType.END) 7043 this = self.expression(exp.If, this=condition, true=true, false=false) 7044 7045 return this 7046 7047 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 7048 if not self._match_text_seq("VALUE", "FOR"): 7049 self._retreat(self._index - 1) 7050 return None 7051 7052 return self.expression( 7053 exp.NextValueFor, 7054 this=self._parse_column(), 7055 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 7056 ) 7057 7058 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 7059 this = self._parse_function() or self._parse_var_or_string(upper=True) 7060 7061 if self._match(TokenType.FROM): 7062 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 7063 7064 if not self._match(TokenType.COMMA): 7065 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 7066 7067 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 7068 7069 def _parse_gap_fill(self) -> exp.GapFill: 7070 self._match(TokenType.TABLE) 7071 this = self._parse_table() 7072 7073 self._match(TokenType.COMMA) 7074 args = [this, *self._parse_csv(self._parse_lambda)] 7075 7076 gap_fill = exp.GapFill.from_arg_list(args) 7077 return self.validate_expression(gap_fill, args) 7078 7079 def _parse_char(self) -> exp.Chr: 7080 return self.expression( 7081 exp.Chr, 7082 expressions=self._parse_csv(self._parse_assignment), 7083 charset=self._match(TokenType.USING) and self._parse_var(), 7084 ) 7085 7086 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 7087 this = self._parse_assignment() 7088 7089 if not self._match(TokenType.ALIAS): 7090 if self._match(TokenType.COMMA): 7091 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 7092 7093 self.raise_error("Expected AS after CAST") 7094 7095 fmt = None 7096 to = self._parse_types() 7097 7098 default = None 7099 if self._match(TokenType.DEFAULT): 7100 default = self._parse_bitwise() 7101 self._match_text_seq("ON", "CONVERSION", "ERROR") 7102 7103 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 7104 fmt_string = self._parse_string() 7105 fmt = self._parse_at_time_zone(fmt_string) 7106 7107 if not to: 7108 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 7109 if to.this in exp.DataType.TEMPORAL_TYPES: 7110 this = self.expression( 7111 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 7112 this=this, 7113 format=exp.Literal.string( 7114 format_time( 7115 fmt_string.this if fmt_string else "", 7116 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 7117 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 7118 ) 7119 ), 7120 safe=safe, 7121 ) 7122 7123 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 7124 this.set("zone", fmt.args["zone"]) 7125 return this 7126 elif not to: 7127 self.raise_error("Expected TYPE after CAST") 7128 elif isinstance(to, exp.Identifier): 7129 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 7130 elif to.this == exp.DataType.Type.CHAR: 7131 if self._match(TokenType.CHARACTER_SET): 7132 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 7133 7134 return self.build_cast( 7135 strict=strict, 7136 this=this, 7137 to=to, 7138 format=fmt, 7139 safe=safe, 7140 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 7141 default=default, 7142 ) 7143 7144 def _parse_string_agg(self) -> exp.GroupConcat: 7145 if self._match(TokenType.DISTINCT): 7146 args: t.List[t.Optional[exp.Expression]] = [ 7147 self.expression(exp.Distinct, expressions=[self._parse_disjunction()]) 7148 ] 7149 if self._match(TokenType.COMMA): 7150 args.extend(self._parse_csv(self._parse_disjunction)) 7151 else: 7152 args = self._parse_csv(self._parse_disjunction) # type: ignore 7153 7154 if self._match_text_seq("ON", "OVERFLOW"): 7155 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 7156 if self._match_text_seq("ERROR"): 7157 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 7158 else: 7159 self._match_text_seq("TRUNCATE") 7160 on_overflow = self.expression( 7161 exp.OverflowTruncateBehavior, 7162 this=self._parse_string(), 7163 with_count=( 7164 self._match_text_seq("WITH", "COUNT") 7165 or not self._match_text_seq("WITHOUT", "COUNT") 7166 ), 7167 ) 7168 else: 7169 on_overflow = None 7170 7171 index = self._index 7172 if not self._match(TokenType.R_PAREN) and args: 7173 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 7174 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 7175 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 7176 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 7177 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 7178 7179 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 7180 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 7181 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 7182 if not self._match_text_seq("WITHIN", "GROUP"): 7183 self._retreat(index) 7184 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 7185 7186 # The corresponding match_r_paren will be called in parse_function (caller) 7187 self._match_l_paren() 7188 7189 return self.expression( 7190 exp.GroupConcat, 7191 this=self._parse_order(this=seq_get(args, 0)), 7192 separator=seq_get(args, 1), 7193 on_overflow=on_overflow, 7194 ) 7195 7196 def _parse_convert( 7197 self, strict: bool, safe: t.Optional[bool] = None 7198 ) -> t.Optional[exp.Expression]: 7199 this = self._parse_bitwise() 7200 7201 if self._match(TokenType.USING): 7202 to: t.Optional[exp.Expression] = self.expression( 7203 exp.CharacterSet, this=self._parse_var(tokens={TokenType.BINARY}) 7204 ) 7205 elif self._match(TokenType.COMMA): 7206 to = self._parse_types() 7207 else: 7208 to = None 7209 7210 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 7211 7212 def _parse_xml_element(self) -> exp.XMLElement: 7213 if self._match_text_seq("EVALNAME"): 7214 evalname = True 7215 this = self._parse_bitwise() 7216 else: 7217 evalname = None 7218 self._match_text_seq("NAME") 7219 this = self._parse_id_var() 7220 7221 return self.expression( 7222 exp.XMLElement, 7223 this=this, 7224 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_bitwise), 7225 evalname=evalname, 7226 ) 7227 7228 def _parse_xml_table(self) -> exp.XMLTable: 7229 namespaces = None 7230 passing = None 7231 columns = None 7232 7233 if self._match_text_seq("XMLNAMESPACES", "("): 7234 namespaces = self._parse_xml_namespace() 7235 self._match_text_seq(")", ",") 7236 7237 this = self._parse_string() 7238 7239 if self._match_text_seq("PASSING"): 7240 # The BY VALUE keywords are optional and are provided for semantic clarity 7241 self._match_text_seq("BY", "VALUE") 7242 passing = self._parse_csv(self._parse_column) 7243 7244 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 7245 7246 if self._match_text_seq("COLUMNS"): 7247 columns = self._parse_csv(self._parse_field_def) 7248 7249 return self.expression( 7250 exp.XMLTable, 7251 this=this, 7252 namespaces=namespaces, 7253 passing=passing, 7254 columns=columns, 7255 by_ref=by_ref, 7256 ) 7257 7258 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 7259 namespaces = [] 7260 7261 while True: 7262 if self._match(TokenType.DEFAULT): 7263 uri = self._parse_string() 7264 else: 7265 uri = self._parse_alias(self._parse_string()) 7266 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 7267 if not self._match(TokenType.COMMA): 7268 break 7269 7270 return namespaces 7271 7272 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 7273 args = self._parse_csv(self._parse_disjunction) 7274 7275 if len(args) < 3: 7276 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 7277 7278 return self.expression(exp.DecodeCase, expressions=args) 7279 7280 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 7281 self._match_text_seq("KEY") 7282 key = self._parse_column() 7283 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 7284 self._match_text_seq("VALUE") 7285 value = self._parse_bitwise() 7286 7287 if not key and not value: 7288 return None 7289 return self.expression(exp.JSONKeyValue, this=key, expression=value) 7290 7291 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7292 if not this or not self._match_text_seq("FORMAT", "JSON"): 7293 return this 7294 7295 return self.expression(exp.FormatJson, this=this) 7296 7297 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 7298 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 7299 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 7300 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 7301 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 7302 else: 7303 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 7304 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 7305 7306 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 7307 7308 if not empty and not error and not null: 7309 return None 7310 7311 return self.expression( 7312 exp.OnCondition, 7313 empty=empty, 7314 error=error, 7315 null=null, 7316 ) 7317 7318 def _parse_on_handling( 7319 self, on: str, *values: str 7320 ) -> t.Optional[str] | t.Optional[exp.Expression]: 7321 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 7322 for value in values: 7323 if self._match_text_seq(value, "ON", on): 7324 return f"{value} ON {on}" 7325 7326 index = self._index 7327 if self._match(TokenType.DEFAULT): 7328 default_value = self._parse_bitwise() 7329 if self._match_text_seq("ON", on): 7330 return default_value 7331 7332 self._retreat(index) 7333 7334 return None 7335 7336 @t.overload 7337 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 7338 7339 @t.overload 7340 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 7341 7342 def _parse_json_object(self, agg=False): 7343 star = self._parse_star() 7344 expressions = ( 7345 [star] 7346 if star 7347 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 7348 ) 7349 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 7350 7351 unique_keys = None 7352 if self._match_text_seq("WITH", "UNIQUE"): 7353 unique_keys = True 7354 elif self._match_text_seq("WITHOUT", "UNIQUE"): 7355 unique_keys = False 7356 7357 self._match_text_seq("KEYS") 7358 7359 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 7360 self._parse_type() 7361 ) 7362 encoding = self._match_text_seq("ENCODING") and self._parse_var() 7363 7364 return self.expression( 7365 exp.JSONObjectAgg if agg else exp.JSONObject, 7366 expressions=expressions, 7367 null_handling=null_handling, 7368 unique_keys=unique_keys, 7369 return_type=return_type, 7370 encoding=encoding, 7371 ) 7372 7373 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 7374 def _parse_json_column_def(self) -> exp.JSONColumnDef: 7375 if not self._match_text_seq("NESTED"): 7376 this = self._parse_id_var() 7377 ordinality = self._match_pair(TokenType.FOR, TokenType.ORDINALITY) 7378 kind = self._parse_types(allow_identifiers=False) 7379 nested = None 7380 else: 7381 this = None 7382 ordinality = None 7383 kind = None 7384 nested = True 7385 7386 path = self._match_text_seq("PATH") and self._parse_string() 7387 nested_schema = nested and self._parse_json_schema() 7388 7389 return self.expression( 7390 exp.JSONColumnDef, 7391 this=this, 7392 kind=kind, 7393 path=path, 7394 nested_schema=nested_schema, 7395 ordinality=ordinality, 7396 ) 7397 7398 def _parse_json_schema(self) -> exp.JSONSchema: 7399 self._match_text_seq("COLUMNS") 7400 return self.expression( 7401 exp.JSONSchema, 7402 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 7403 ) 7404 7405 def _parse_json_table(self) -> exp.JSONTable: 7406 this = self._parse_format_json(self._parse_bitwise()) 7407 path = self._match(TokenType.COMMA) and self._parse_string() 7408 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 7409 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 7410 schema = self._parse_json_schema() 7411 7412 return exp.JSONTable( 7413 this=this, 7414 schema=schema, 7415 path=path, 7416 error_handling=error_handling, 7417 empty_handling=empty_handling, 7418 ) 7419 7420 def _parse_match_against(self) -> exp.MatchAgainst: 7421 if self._match_text_seq("TABLE"): 7422 # parse SingleStore MATCH(TABLE ...) syntax 7423 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 7424 expressions = [] 7425 table = self._parse_table() 7426 if table: 7427 expressions = [table] 7428 else: 7429 expressions = self._parse_csv(self._parse_column) 7430 7431 self._match_text_seq(")", "AGAINST", "(") 7432 7433 this = self._parse_string() 7434 7435 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 7436 modifier = "IN NATURAL LANGUAGE MODE" 7437 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 7438 modifier = f"{modifier} WITH QUERY EXPANSION" 7439 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 7440 modifier = "IN BOOLEAN MODE" 7441 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 7442 modifier = "WITH QUERY EXPANSION" 7443 else: 7444 modifier = None 7445 7446 return self.expression( 7447 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 7448 ) 7449 7450 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 7451 def _parse_open_json(self) -> exp.OpenJSON: 7452 this = self._parse_bitwise() 7453 path = self._match(TokenType.COMMA) and self._parse_string() 7454 7455 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 7456 this = self._parse_field(any_token=True) 7457 kind = self._parse_types() 7458 path = self._parse_string() 7459 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 7460 7461 return self.expression( 7462 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 7463 ) 7464 7465 expressions = None 7466 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 7467 self._match_l_paren() 7468 expressions = self._parse_csv(_parse_open_json_column_def) 7469 7470 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 7471 7472 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 7473 args = self._parse_csv(self._parse_bitwise) 7474 7475 if self._match(TokenType.IN): 7476 return self.expression( 7477 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 7478 ) 7479 7480 if haystack_first: 7481 haystack = seq_get(args, 0) 7482 needle = seq_get(args, 1) 7483 else: 7484 haystack = seq_get(args, 1) 7485 needle = seq_get(args, 0) 7486 7487 return self.expression( 7488 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 7489 ) 7490 7491 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 7492 args = self._parse_csv(self._parse_table) 7493 return exp.JoinHint(this=func_name.upper(), expressions=args) 7494 7495 def _parse_substring(self) -> exp.Substring: 7496 # Postgres supports the form: substring(string [from int] [for int]) 7497 # (despite being undocumented, the reverse order also works) 7498 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 7499 7500 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 7501 7502 start, length = None, None 7503 7504 while self._curr: 7505 if self._match(TokenType.FROM): 7506 start = self._parse_bitwise() 7507 elif self._match(TokenType.FOR): 7508 if not start: 7509 start = exp.Literal.number(1) 7510 length = self._parse_bitwise() 7511 else: 7512 break 7513 7514 if start: 7515 args.append(start) 7516 if length: 7517 args.append(length) 7518 7519 return self.validate_expression(exp.Substring.from_arg_list(args), args) 7520 7521 def _parse_trim(self) -> exp.Trim: 7522 # https://www.w3resource.com/sql/character-functions/trim.php 7523 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 7524 7525 position = None 7526 collation = None 7527 expression = None 7528 7529 if self._match_texts(self.TRIM_TYPES): 7530 position = self._prev.text.upper() 7531 7532 this = self._parse_bitwise() 7533 if self._match_set((TokenType.FROM, TokenType.COMMA)): 7534 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 7535 expression = self._parse_bitwise() 7536 7537 if invert_order: 7538 this, expression = expression, this 7539 7540 if self._match(TokenType.COLLATE): 7541 collation = self._parse_bitwise() 7542 7543 return self.expression( 7544 exp.Trim, this=this, position=position, expression=expression, collation=collation 7545 ) 7546 7547 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 7548 return self._parse_csv(self._parse_named_window) if self._match(TokenType.WINDOW) else None 7549 7550 def _parse_named_window(self) -> t.Optional[exp.Expression]: 7551 return self._parse_window(self._parse_id_var(), alias=True) 7552 7553 def _parse_respect_or_ignore_nulls( 7554 self, this: t.Optional[exp.Expression] 7555 ) -> t.Optional[exp.Expression]: 7556 if self._match_text_seq("IGNORE", "NULLS"): 7557 return self.expression(exp.IgnoreNulls, this=this) 7558 if self._match_text_seq("RESPECT", "NULLS"): 7559 return self.expression(exp.RespectNulls, this=this) 7560 return this 7561 7562 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7563 if self._match(TokenType.HAVING): 7564 self._match_texts(("MAX", "MIN")) 7565 max = self._prev.text.upper() != "MIN" 7566 return self.expression( 7567 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7568 ) 7569 7570 return this 7571 7572 def _parse_window( 7573 self, this: t.Optional[exp.Expression], alias: bool = False 7574 ) -> t.Optional[exp.Expression]: 7575 func = this 7576 comments = func.comments if isinstance(func, exp.Expression) else None 7577 7578 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7579 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7580 if self._match_text_seq("WITHIN", "GROUP"): 7581 order = self._parse_wrapped(self._parse_order) 7582 this = self.expression(exp.WithinGroup, this=this, expression=order) 7583 7584 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7585 self._match(TokenType.WHERE) 7586 this = self.expression( 7587 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7588 ) 7589 self._match_r_paren() 7590 7591 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7592 # Some dialects choose to implement and some do not. 7593 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7594 7595 # There is some code above in _parse_lambda that handles 7596 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7597 7598 # The below changes handle 7599 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7600 7601 # Oracle allows both formats 7602 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7603 # and Snowflake chose to do the same for familiarity 7604 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7605 if isinstance(this, exp.AggFunc): 7606 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7607 7608 if ignore_respect and ignore_respect is not this: 7609 ignore_respect.replace(ignore_respect.this) 7610 this = self.expression(ignore_respect.__class__, this=this) 7611 7612 this = self._parse_respect_or_ignore_nulls(this) 7613 7614 # bigquery select from window x AS (partition by ...) 7615 if alias: 7616 over = None 7617 self._match(TokenType.ALIAS) 7618 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7619 return this 7620 else: 7621 over = self._prev.text.upper() 7622 7623 if comments and isinstance(func, exp.Expression): 7624 func.pop_comments() 7625 7626 if not self._match(TokenType.L_PAREN): 7627 return self.expression( 7628 exp.Window, 7629 comments=comments, 7630 this=this, 7631 alias=self._parse_id_var(False), 7632 over=over, 7633 ) 7634 7635 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7636 7637 first: t.Optional[bool] = True if self._match(TokenType.FIRST) else None 7638 if self._match_text_seq("LAST"): 7639 first = False 7640 7641 partition, order = self._parse_partition_and_order() 7642 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7643 7644 if kind: 7645 self._match(TokenType.BETWEEN) 7646 start = self._parse_window_spec() 7647 7648 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7649 exclude = ( 7650 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7651 if self._match_text_seq("EXCLUDE") 7652 else None 7653 ) 7654 7655 spec = self.expression( 7656 exp.WindowSpec, 7657 kind=kind, 7658 start=start["value"], 7659 start_side=start["side"], 7660 end=end.get("value"), 7661 end_side=end.get("side"), 7662 exclude=exclude, 7663 ) 7664 else: 7665 spec = None 7666 7667 self._match_r_paren() 7668 7669 window = self.expression( 7670 exp.Window, 7671 comments=comments, 7672 this=this, 7673 partition_by=partition, 7674 order=order, 7675 spec=spec, 7676 alias=window_alias, 7677 over=over, 7678 first=first, 7679 ) 7680 7681 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7682 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7683 return self._parse_window(window, alias=alias) 7684 7685 return window 7686 7687 def _parse_partition_and_order( 7688 self, 7689 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7690 return self._parse_partition_by(), self._parse_order() 7691 7692 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7693 self._match(TokenType.BETWEEN) 7694 7695 return { 7696 "value": ( 7697 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7698 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7699 or self._parse_bitwise() 7700 ), 7701 "side": self._prev.text if self._match_texts(self.WINDOW_SIDES) else None, 7702 } 7703 7704 def _parse_alias( 7705 self, this: t.Optional[exp.Expression], explicit: bool = False 7706 ) -> t.Optional[exp.Expression]: 7707 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7708 # so this section tries to parse the clause version and if it fails, it treats the token 7709 # as an identifier (alias) 7710 if self._can_parse_limit_or_offset(): 7711 return this 7712 7713 any_token = self._match(TokenType.ALIAS) 7714 comments = self._prev_comments or [] 7715 7716 if explicit and not any_token: 7717 return this 7718 7719 if self._match(TokenType.L_PAREN): 7720 aliases = self.expression( 7721 exp.Aliases, 7722 comments=comments, 7723 this=this, 7724 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7725 ) 7726 self._match_r_paren(aliases) 7727 return aliases 7728 7729 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7730 self.STRING_ALIASES and self._parse_string_as_identifier() 7731 ) 7732 7733 if alias: 7734 comments.extend(alias.pop_comments()) 7735 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7736 column = this.this 7737 7738 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7739 if not this.comments and column and column.comments: 7740 this.comments = column.pop_comments() 7741 7742 return this 7743 7744 def _parse_id_var( 7745 self, 7746 any_token: bool = True, 7747 tokens: t.Optional[t.Collection[TokenType]] = None, 7748 ) -> t.Optional[exp.Expression]: 7749 expression = self._parse_identifier() 7750 if not expression and ( 7751 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7752 ): 7753 quoted = self._prev.token_type == TokenType.STRING 7754 expression = self._identifier_expression(quoted=quoted) 7755 7756 return expression 7757 7758 def _parse_string(self) -> t.Optional[exp.Expression]: 7759 if self._match_set(self.STRING_PARSERS): 7760 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7761 return self._parse_placeholder() 7762 7763 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7764 output = exp.to_identifier( 7765 self._prev.text if self._match(TokenType.STRING) else None, quoted=True 7766 ) 7767 if output: 7768 output.update_positions(self._prev) 7769 return output 7770 7771 def _parse_number(self) -> t.Optional[exp.Expression]: 7772 if self._match_set(self.NUMERIC_PARSERS): 7773 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7774 return self._parse_placeholder() 7775 7776 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7777 if self._match(TokenType.IDENTIFIER): 7778 return self._identifier_expression(quoted=True) 7779 return self._parse_placeholder() 7780 7781 def _parse_var( 7782 self, 7783 any_token: bool = False, 7784 tokens: t.Optional[t.Collection[TokenType]] = None, 7785 upper: bool = False, 7786 ) -> t.Optional[exp.Expression]: 7787 if ( 7788 (any_token and self._advance_any()) 7789 or self._match(TokenType.VAR) 7790 or (self._match_set(tokens) if tokens else False) 7791 ): 7792 return self.expression( 7793 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7794 ) 7795 return self._parse_placeholder() 7796 7797 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7798 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7799 self._advance() 7800 return self._prev 7801 return None 7802 7803 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7804 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7805 7806 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7807 return self._parse_primary() or self._parse_var(any_token=True) 7808 7809 def _parse_null(self) -> t.Optional[exp.Expression]: 7810 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 7811 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7812 return self._parse_placeholder() 7813 7814 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7815 if self._match(TokenType.TRUE): 7816 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7817 if self._match(TokenType.FALSE): 7818 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7819 return self._parse_placeholder() 7820 7821 def _parse_star(self) -> t.Optional[exp.Expression]: 7822 if self._match(TokenType.STAR): 7823 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7824 return self._parse_placeholder() 7825 7826 def _parse_parameter(self) -> exp.Parameter: 7827 this = self._parse_identifier() or self._parse_primary_or_var() 7828 return self.expression(exp.Parameter, this=this) 7829 7830 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7831 if self._match_set(self.PLACEHOLDER_PARSERS): 7832 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7833 if placeholder: 7834 return placeholder 7835 self._advance(-1) 7836 return None 7837 7838 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7839 if not self._match_texts(keywords): 7840 return None 7841 if self._match(TokenType.L_PAREN, advance=False): 7842 return self._parse_wrapped_csv(self._parse_expression) 7843 7844 expression = self._parse_alias(self._parse_disjunction(), explicit=True) 7845 return [expression] if expression else None 7846 7847 def _parse_csv( 7848 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7849 ) -> t.List[exp.Expression]: 7850 parse_result = parse_method() 7851 items = [parse_result] if parse_result is not None else [] 7852 7853 while self._match(sep): 7854 self._add_comments(parse_result) 7855 parse_result = parse_method() 7856 if parse_result is not None: 7857 items.append(parse_result) 7858 7859 return items 7860 7861 def _parse_tokens( 7862 self, parse_method: t.Callable, expressions: t.Dict 7863 ) -> t.Optional[exp.Expression]: 7864 this = parse_method() 7865 7866 while self._match_set(expressions): 7867 this = self.expression( 7868 expressions[self._prev.token_type], 7869 this=this, 7870 comments=self._prev_comments, 7871 expression=parse_method(), 7872 ) 7873 7874 return this 7875 7876 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7877 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7878 7879 def _parse_wrapped_csv( 7880 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7881 ) -> t.List[exp.Expression]: 7882 return self._parse_wrapped( 7883 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7884 ) 7885 7886 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7887 wrapped = self._match(TokenType.L_PAREN) 7888 if not wrapped and not optional: 7889 self.raise_error("Expecting (") 7890 parse_result = parse_method() 7891 if wrapped: 7892 self._match_r_paren() 7893 return parse_result 7894 7895 def _parse_expressions(self) -> t.List[exp.Expression]: 7896 return self._parse_csv(self._parse_expression) 7897 7898 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7899 return ( 7900 self._parse_set_operations( 7901 self._parse_alias(self._parse_assignment(), explicit=True) 7902 if alias 7903 else self._parse_assignment() 7904 ) 7905 or self._parse_select() 7906 ) 7907 7908 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7909 return self._parse_query_modifiers( 7910 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7911 ) 7912 7913 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7914 this = None 7915 if self._match_texts(self.TRANSACTION_KIND): 7916 this = self._prev.text 7917 7918 self._match_texts(("TRANSACTION", "WORK")) 7919 7920 modes = [] 7921 while True: 7922 mode = [] 7923 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7924 mode.append(self._prev.text) 7925 7926 if mode: 7927 modes.append(" ".join(mode)) 7928 if not self._match(TokenType.COMMA): 7929 break 7930 7931 return self.expression(exp.Transaction, this=this, modes=modes) 7932 7933 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7934 chain = None 7935 savepoint = None 7936 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7937 7938 self._match_texts(("TRANSACTION", "WORK")) 7939 7940 if self._match_text_seq("TO"): 7941 self._match_text_seq("SAVEPOINT") 7942 savepoint = self._parse_id_var() 7943 7944 if self._match(TokenType.AND): 7945 chain = not self._match_text_seq("NO") 7946 self._match_text_seq("CHAIN") 7947 7948 if is_rollback: 7949 return self.expression(exp.Rollback, savepoint=savepoint) 7950 7951 return self.expression(exp.Commit, chain=chain) 7952 7953 def _parse_refresh(self) -> exp.Refresh | exp.Command: 7954 if self._match(TokenType.TABLE): 7955 kind = "TABLE" 7956 elif self._match_text_seq("MATERIALIZED", "VIEW"): 7957 kind = "MATERIALIZED VIEW" 7958 else: 7959 kind = "" 7960 7961 this = self._parse_string() or self._parse_table() 7962 if not kind and not isinstance(this, exp.Literal): 7963 return self._parse_as_command(self._prev) 7964 7965 return self.expression(exp.Refresh, this=this, kind=kind) 7966 7967 def _parse_column_def_with_exists(self): 7968 start = self._index 7969 self._match(TokenType.COLUMN) 7970 7971 exists_column = self._parse_exists(not_=True) 7972 expression = self._parse_field_def() 7973 7974 if not isinstance(expression, exp.ColumnDef): 7975 self._retreat(start) 7976 return None 7977 7978 expression.set("exists", exists_column) 7979 7980 return expression 7981 7982 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7983 if not self._prev.text.upper() == "ADD": 7984 return None 7985 7986 expression = self._parse_column_def_with_exists() 7987 if not expression: 7988 return None 7989 7990 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7991 if self._match_texts(("FIRST", "AFTER")): 7992 position = self._prev.text 7993 column_position = self.expression( 7994 exp.ColumnPosition, this=self._parse_column(), position=position 7995 ) 7996 expression.set("position", column_position) 7997 7998 return expression 7999 8000 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 8001 drop = self._parse_drop() if self._match(TokenType.DROP) else None 8002 if drop and not isinstance(drop, exp.Command): 8003 drop.set("kind", drop.args.get("kind", "COLUMN")) 8004 return drop 8005 8006 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 8007 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 8008 return self.expression( 8009 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 8010 ) 8011 8012 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 8013 def _parse_add_alteration() -> t.Optional[exp.Expression]: 8014 self._match_text_seq("ADD") 8015 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 8016 return self.expression( 8017 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 8018 ) 8019 8020 column_def = self._parse_add_column() 8021 if isinstance(column_def, exp.ColumnDef): 8022 return column_def 8023 8024 exists = self._parse_exists(not_=True) 8025 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 8026 return self.expression( 8027 exp.AddPartition, 8028 exists=exists, 8029 this=self._parse_field(any_token=True), 8030 location=self._match_text_seq("LOCATION", advance=False) 8031 and self._parse_property(), 8032 ) 8033 8034 return None 8035 8036 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 8037 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 8038 or self._match_text_seq("COLUMNS") 8039 ): 8040 schema = self._parse_schema() 8041 8042 return ( 8043 ensure_list(schema) 8044 if schema 8045 else self._parse_csv(self._parse_column_def_with_exists) 8046 ) 8047 8048 return self._parse_csv(_parse_add_alteration) 8049 8050 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 8051 if self._match_texts(self.ALTER_ALTER_PARSERS): 8052 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 8053 8054 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 8055 # keyword after ALTER we default to parsing this statement 8056 self._match(TokenType.COLUMN) 8057 column = self._parse_field(any_token=True) 8058 8059 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 8060 return self.expression(exp.AlterColumn, this=column, drop=True) 8061 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 8062 return self.expression(exp.AlterColumn, this=column, default=self._parse_disjunction()) 8063 if self._match(TokenType.COMMENT): 8064 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 8065 if self._match_text_seq("DROP", "NOT", "NULL"): 8066 return self.expression( 8067 exp.AlterColumn, 8068 this=column, 8069 drop=True, 8070 allow_null=True, 8071 ) 8072 if self._match_text_seq("SET", "NOT", "NULL"): 8073 return self.expression( 8074 exp.AlterColumn, 8075 this=column, 8076 allow_null=False, 8077 ) 8078 8079 if self._match_text_seq("SET", "VISIBLE"): 8080 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 8081 if self._match_text_seq("SET", "INVISIBLE"): 8082 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 8083 8084 self._match_text_seq("SET", "DATA") 8085 self._match_text_seq("TYPE") 8086 return self.expression( 8087 exp.AlterColumn, 8088 this=column, 8089 dtype=self._parse_types(), 8090 collate=self._match(TokenType.COLLATE) and self._parse_term(), 8091 using=self._match(TokenType.USING) and self._parse_disjunction(), 8092 ) 8093 8094 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 8095 if self._match_texts(("ALL", "EVEN", "AUTO")): 8096 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 8097 8098 self._match_text_seq("KEY", "DISTKEY") 8099 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 8100 8101 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 8102 if compound: 8103 self._match_text_seq("SORTKEY") 8104 8105 if self._match(TokenType.L_PAREN, advance=False): 8106 return self.expression( 8107 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 8108 ) 8109 8110 self._match_texts(("AUTO", "NONE")) 8111 return self.expression( 8112 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 8113 ) 8114 8115 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 8116 index = self._index - 1 8117 8118 partition_exists = self._parse_exists() 8119 if self._match(TokenType.PARTITION, advance=False): 8120 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 8121 8122 self._retreat(index) 8123 return self._parse_csv(self._parse_drop_column) 8124 8125 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 8126 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 8127 exists = self._parse_exists() 8128 old_column = self._parse_column() 8129 to = self._match_text_seq("TO") 8130 new_column = self._parse_column() 8131 8132 if old_column is None or not to or new_column is None: 8133 return None 8134 8135 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 8136 8137 self._match_text_seq("TO") 8138 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 8139 8140 def _parse_alter_table_set(self) -> exp.AlterSet: 8141 alter_set = self.expression(exp.AlterSet) 8142 8143 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 8144 "TABLE", "PROPERTIES" 8145 ): 8146 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 8147 elif self._match_text_seq("FILESTREAM_ON", advance=False): 8148 alter_set.set("expressions", [self._parse_assignment()]) 8149 elif self._match_texts(("LOGGED", "UNLOGGED")): 8150 alter_set.set("option", exp.var(self._prev.text.upper())) 8151 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 8152 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 8153 elif self._match_text_seq("LOCATION"): 8154 alter_set.set("location", self._parse_field()) 8155 elif self._match_text_seq("ACCESS", "METHOD"): 8156 alter_set.set("access_method", self._parse_field()) 8157 elif self._match_text_seq("TABLESPACE"): 8158 alter_set.set("tablespace", self._parse_field()) 8159 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 8160 alter_set.set("file_format", [self._parse_field()]) 8161 elif self._match_text_seq("STAGE_FILE_FORMAT"): 8162 alter_set.set("file_format", self._parse_wrapped_options()) 8163 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 8164 alter_set.set("copy_options", self._parse_wrapped_options()) 8165 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 8166 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 8167 else: 8168 if self._match_text_seq("SERDE"): 8169 alter_set.set("serde", self._parse_field()) 8170 8171 properties = self._parse_wrapped(self._parse_properties, optional=True) 8172 alter_set.set("expressions", [properties]) 8173 8174 return alter_set 8175 8176 def _parse_alter_session(self) -> exp.AlterSession: 8177 """Parse ALTER SESSION SET/UNSET statements.""" 8178 if self._match(TokenType.SET): 8179 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 8180 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 8181 8182 self._match_text_seq("UNSET") 8183 expressions = self._parse_csv( 8184 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 8185 ) 8186 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 8187 8188 def _parse_alter(self) -> exp.Alter | exp.Command: 8189 start = self._prev 8190 8191 alter_token = self._match_set(self.ALTERABLES) and self._prev 8192 if not alter_token: 8193 return self._parse_as_command(start) 8194 8195 exists = self._parse_exists() 8196 only = self._match_text_seq("ONLY") 8197 8198 if alter_token.token_type == TokenType.SESSION: 8199 this = None 8200 check = None 8201 cluster = None 8202 else: 8203 this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS) 8204 check = self._match_text_seq("WITH", "CHECK") 8205 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8206 8207 if self._next: 8208 self._advance() 8209 8210 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 8211 if parser: 8212 actions = ensure_list(parser(self)) 8213 not_valid = self._match_text_seq("NOT", "VALID") 8214 options = self._parse_csv(self._parse_property) 8215 cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE") 8216 8217 if not self._curr and actions: 8218 return self.expression( 8219 exp.Alter, 8220 this=this, 8221 kind=alter_token.text.upper(), 8222 exists=exists, 8223 actions=actions, 8224 only=only, 8225 options=options, 8226 cluster=cluster, 8227 not_valid=not_valid, 8228 check=check, 8229 cascade=cascade, 8230 ) 8231 8232 return self._parse_as_command(start) 8233 8234 def _parse_analyze(self) -> exp.Analyze | exp.Command: 8235 start = self._prev 8236 # https://duckdb.org/docs/sql/statements/analyze 8237 if not self._curr: 8238 return self.expression(exp.Analyze) 8239 8240 options = [] 8241 while self._match_texts(self.ANALYZE_STYLES): 8242 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 8243 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 8244 else: 8245 options.append(self._prev.text.upper()) 8246 8247 this: t.Optional[exp.Expression] = None 8248 inner_expression: t.Optional[exp.Expression] = None 8249 8250 kind = self._curr and self._curr.text.upper() 8251 8252 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 8253 this = self._parse_table_parts() 8254 elif self._match_text_seq("TABLES"): 8255 if self._match_set((TokenType.FROM, TokenType.IN)): 8256 kind = f"{kind} {self._prev.text.upper()}" 8257 this = self._parse_table(schema=True, is_db_reference=True) 8258 elif self._match_text_seq("DATABASE"): 8259 this = self._parse_table(schema=True, is_db_reference=True) 8260 elif self._match_text_seq("CLUSTER"): 8261 this = self._parse_table() 8262 # Try matching inner expr keywords before fallback to parse table. 8263 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 8264 kind = None 8265 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 8266 else: 8267 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 8268 kind = None 8269 this = self._parse_table_parts() 8270 8271 partition = self._try_parse(self._parse_partition) 8272 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 8273 return self._parse_as_command(start) 8274 8275 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 8276 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 8277 "WITH", "ASYNC", "MODE" 8278 ): 8279 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 8280 else: 8281 mode = None 8282 8283 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 8284 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 8285 8286 properties = self._parse_properties() 8287 return self.expression( 8288 exp.Analyze, 8289 kind=kind, 8290 this=this, 8291 mode=mode, 8292 partition=partition, 8293 properties=properties, 8294 expression=inner_expression, 8295 options=options, 8296 ) 8297 8298 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 8299 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 8300 this = None 8301 kind = self._prev.text.upper() 8302 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 8303 expressions = [] 8304 8305 if not self._match_text_seq("STATISTICS"): 8306 self.raise_error("Expecting token STATISTICS") 8307 8308 if self._match_text_seq("NOSCAN"): 8309 this = "NOSCAN" 8310 elif self._match(TokenType.FOR): 8311 if self._match_text_seq("ALL", "COLUMNS"): 8312 this = "FOR ALL COLUMNS" 8313 if self._match_texts("COLUMNS"): 8314 this = "FOR COLUMNS" 8315 expressions = self._parse_csv(self._parse_column_reference) 8316 elif self._match_text_seq("SAMPLE"): 8317 sample = self._parse_number() 8318 expressions = [ 8319 self.expression( 8320 exp.AnalyzeSample, 8321 sample=sample, 8322 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 8323 ) 8324 ] 8325 8326 return self.expression( 8327 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 8328 ) 8329 8330 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 8331 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 8332 kind = None 8333 this = None 8334 expression: t.Optional[exp.Expression] = None 8335 if self._match_text_seq("REF", "UPDATE"): 8336 kind = "REF" 8337 this = "UPDATE" 8338 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 8339 this = "UPDATE SET DANGLING TO NULL" 8340 elif self._match_text_seq("STRUCTURE"): 8341 kind = "STRUCTURE" 8342 if self._match_text_seq("CASCADE", "FAST"): 8343 this = "CASCADE FAST" 8344 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 8345 ("ONLINE", "OFFLINE") 8346 ): 8347 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 8348 expression = self._parse_into() 8349 8350 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 8351 8352 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 8353 this = self._prev.text.upper() 8354 if self._match_text_seq("COLUMNS"): 8355 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 8356 return None 8357 8358 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 8359 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 8360 if self._match_text_seq("STATISTICS"): 8361 return self.expression(exp.AnalyzeDelete, kind=kind) 8362 return None 8363 8364 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 8365 if self._match_text_seq("CHAINED", "ROWS"): 8366 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 8367 return None 8368 8369 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 8370 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 8371 this = self._prev.text.upper() 8372 expression: t.Optional[exp.Expression] = None 8373 expressions = [] 8374 update_options = None 8375 8376 if self._match_text_seq("HISTOGRAM", "ON"): 8377 expressions = self._parse_csv(self._parse_column_reference) 8378 with_expressions = [] 8379 while self._match(TokenType.WITH): 8380 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 8381 if self._match_texts(("SYNC", "ASYNC")): 8382 if self._match_text_seq("MODE", advance=False): 8383 with_expressions.append(f"{self._prev.text.upper()} MODE") 8384 self._advance() 8385 else: 8386 buckets = self._parse_number() 8387 if self._match_text_seq("BUCKETS"): 8388 with_expressions.append(f"{buckets} BUCKETS") 8389 if with_expressions: 8390 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 8391 8392 if self._match_texts(("MANUAL", "AUTO")) and self._match( 8393 TokenType.UPDATE, advance=False 8394 ): 8395 update_options = self._prev.text.upper() 8396 self._advance() 8397 elif self._match_text_seq("USING", "DATA"): 8398 expression = self.expression(exp.UsingData, this=self._parse_string()) 8399 8400 return self.expression( 8401 exp.AnalyzeHistogram, 8402 this=this, 8403 expressions=expressions, 8404 expression=expression, 8405 update_options=update_options, 8406 ) 8407 8408 def _parse_merge(self) -> exp.Merge: 8409 self._match(TokenType.INTO) 8410 target = self._parse_table() 8411 8412 if target and self._match(TokenType.ALIAS, advance=False): 8413 target.set("alias", self._parse_table_alias()) 8414 8415 self._match(TokenType.USING) 8416 using = self._parse_table() 8417 8418 return self.expression( 8419 exp.Merge, 8420 this=target, 8421 using=using, 8422 on=self._match(TokenType.ON) and self._parse_disjunction(), 8423 using_cond=self._match(TokenType.USING) and self._parse_using_identifiers(), 8424 whens=self._parse_when_matched(), 8425 returning=self._parse_returning(), 8426 ) 8427 8428 def _parse_when_matched(self) -> exp.Whens: 8429 whens = [] 8430 8431 while self._match(TokenType.WHEN): 8432 matched = not self._match(TokenType.NOT) 8433 self._match_text_seq("MATCHED") 8434 source = ( 8435 False 8436 if self._match_text_seq("BY", "TARGET") 8437 else self._match_text_seq("BY", "SOURCE") 8438 ) 8439 condition = self._parse_disjunction() if self._match(TokenType.AND) else None 8440 8441 self._match(TokenType.THEN) 8442 8443 if self._match(TokenType.INSERT): 8444 this = self._parse_star() 8445 if this: 8446 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 8447 else: 8448 then = self.expression( 8449 exp.Insert, 8450 this=exp.var("ROW") 8451 if self._match_text_seq("ROW") 8452 else self._parse_value(values=False), 8453 expression=self._match_text_seq("VALUES") and self._parse_value(), 8454 ) 8455 elif self._match(TokenType.UPDATE): 8456 expressions = self._parse_star() 8457 if expressions: 8458 then = self.expression(exp.Update, expressions=expressions) 8459 else: 8460 then = self.expression( 8461 exp.Update, 8462 expressions=self._match(TokenType.SET) 8463 and self._parse_csv(self._parse_equality), 8464 ) 8465 elif self._match(TokenType.DELETE): 8466 then = self.expression(exp.Var, this=self._prev.text) 8467 else: 8468 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 8469 8470 whens.append( 8471 self.expression( 8472 exp.When, 8473 matched=matched, 8474 source=source, 8475 condition=condition, 8476 then=then, 8477 ) 8478 ) 8479 return self.expression(exp.Whens, expressions=whens) 8480 8481 def _parse_show(self) -> t.Optional[exp.Expression]: 8482 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 8483 if parser: 8484 return parser(self) 8485 return self._parse_as_command(self._prev) 8486 8487 def _parse_set_item_assignment( 8488 self, kind: t.Optional[str] = None 8489 ) -> t.Optional[exp.Expression]: 8490 index = self._index 8491 8492 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 8493 return self._parse_set_transaction(global_=kind == "GLOBAL") 8494 8495 left = self._parse_primary() or self._parse_column() 8496 assignment_delimiter = self._match_texts(self.SET_ASSIGNMENT_DELIMITERS) 8497 8498 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 8499 self._retreat(index) 8500 return None 8501 8502 right = self._parse_statement() or self._parse_id_var() 8503 if isinstance(right, (exp.Column, exp.Identifier)): 8504 right = exp.var(right.name) 8505 8506 this = self.expression(exp.EQ, this=left, expression=right) 8507 return self.expression(exp.SetItem, this=this, kind=kind) 8508 8509 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 8510 self._match_text_seq("TRANSACTION") 8511 characteristics = self._parse_csv( 8512 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 8513 ) 8514 return self.expression( 8515 exp.SetItem, 8516 expressions=characteristics, 8517 kind="TRANSACTION", 8518 global_=global_, 8519 ) 8520 8521 def _parse_set_item(self) -> t.Optional[exp.Expression]: 8522 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 8523 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 8524 8525 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 8526 index = self._index 8527 set_ = self.expression( 8528 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 8529 ) 8530 8531 if self._curr: 8532 self._retreat(index) 8533 return self._parse_as_command(self._prev) 8534 8535 return set_ 8536 8537 def _parse_var_from_options( 8538 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 8539 ) -> t.Optional[exp.Var]: 8540 start = self._curr 8541 if not start: 8542 return None 8543 8544 option = start.text.upper() 8545 continuations = options.get(option) 8546 8547 index = self._index 8548 self._advance() 8549 for keywords in continuations or []: 8550 if isinstance(keywords, str): 8551 keywords = (keywords,) 8552 8553 if self._match_text_seq(*keywords): 8554 option = f"{option} {' '.join(keywords)}" 8555 break 8556 else: 8557 if continuations or continuations is None: 8558 if raise_unmatched: 8559 self.raise_error(f"Unknown option {option}") 8560 8561 self._retreat(index) 8562 return None 8563 8564 return exp.var(option) 8565 8566 def _parse_as_command(self, start: Token) -> exp.Command: 8567 while self._curr: 8568 self._advance() 8569 text = self._find_sql(start, self._prev) 8570 size = len(start.text) 8571 self._warn_unsupported() 8572 return exp.Command(this=text[:size], expression=text[size:]) 8573 8574 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8575 settings = [] 8576 8577 self._match_l_paren() 8578 kind = self._parse_id_var() 8579 8580 if self._match(TokenType.L_PAREN): 8581 while True: 8582 key = self._parse_id_var() 8583 value = self._parse_primary() 8584 if not key and value is None: 8585 break 8586 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8587 self._match(TokenType.R_PAREN) 8588 8589 self._match_r_paren() 8590 8591 return self.expression( 8592 exp.DictProperty, 8593 this=this, 8594 kind=kind.this if kind else None, 8595 settings=settings, 8596 ) 8597 8598 def _parse_dict_range(self, this: str) -> exp.DictRange: 8599 self._match_l_paren() 8600 has_min = self._match_text_seq("MIN") 8601 if has_min: 8602 min = self._parse_var() or self._parse_primary() 8603 self._match_text_seq("MAX") 8604 max = self._parse_var() or self._parse_primary() 8605 else: 8606 max = self._parse_var() or self._parse_primary() 8607 min = exp.Literal.number(0) 8608 self._match_r_paren() 8609 return self.expression(exp.DictRange, this=this, min=min, max=max) 8610 8611 def _parse_comprehension( 8612 self, this: t.Optional[exp.Expression] 8613 ) -> t.Optional[exp.Comprehension]: 8614 index = self._index 8615 expression = self._parse_column() 8616 position = self._match(TokenType.COMMA) and self._parse_column() 8617 8618 if not self._match(TokenType.IN): 8619 self._retreat(index - 1) 8620 return None 8621 iterator = self._parse_column() 8622 condition = self._parse_disjunction() if self._match_text_seq("IF") else None 8623 return self.expression( 8624 exp.Comprehension, 8625 this=this, 8626 expression=expression, 8627 position=position, 8628 iterator=iterator, 8629 condition=condition, 8630 ) 8631 8632 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8633 if self._match(TokenType.HEREDOC_STRING): 8634 return self.expression(exp.Heredoc, this=self._prev.text) 8635 8636 if not self._match_text_seq("$"): 8637 return None 8638 8639 tags = ["$"] 8640 tag_text = None 8641 8642 if self._is_connected(): 8643 self._advance() 8644 tags.append(self._prev.text.upper()) 8645 else: 8646 self.raise_error("No closing $ found") 8647 8648 if tags[-1] != "$": 8649 if self._is_connected() and self._match_text_seq("$"): 8650 tag_text = tags[-1] 8651 tags.append("$") 8652 else: 8653 self.raise_error("No closing $ found") 8654 8655 heredoc_start = self._curr 8656 8657 while self._curr: 8658 if self._match_text_seq(*tags, advance=False): 8659 this = self._find_sql(heredoc_start, self._prev) 8660 self._advance(len(tags)) 8661 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8662 8663 self._advance() 8664 8665 self.raise_error(f"No closing {''.join(tags)} found") 8666 return None 8667 8668 def _find_parser( 8669 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8670 ) -> t.Optional[t.Callable]: 8671 if not self._curr: 8672 return None 8673 8674 index = self._index 8675 this = [] 8676 while True: 8677 # The current token might be multiple words 8678 curr = self._curr.text.upper() 8679 key = curr.split(" ") 8680 this.append(curr) 8681 8682 self._advance() 8683 result, trie = in_trie(trie, key) 8684 if result == TrieResult.FAILED: 8685 break 8686 8687 if result == TrieResult.EXISTS: 8688 subparser = parsers[" ".join(this)] 8689 return subparser 8690 8691 self._retreat(index) 8692 return None 8693 8694 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8695 if not self._match(TokenType.L_PAREN, expression=expression): 8696 self.raise_error("Expecting (") 8697 8698 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8699 if not self._match(TokenType.R_PAREN, expression=expression): 8700 self.raise_error("Expecting )") 8701 8702 def _replace_lambda( 8703 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8704 ) -> t.Optional[exp.Expression]: 8705 if not node: 8706 return node 8707 8708 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8709 8710 for column in node.find_all(exp.Column): 8711 typ = lambda_types.get(column.parts[0].name) 8712 if typ is not None: 8713 dot_or_id = column.to_dot() if column.table else column.this 8714 8715 if typ: 8716 dot_or_id = self.expression( 8717 exp.Cast, 8718 this=dot_or_id, 8719 to=typ, 8720 ) 8721 8722 parent = column.parent 8723 8724 while isinstance(parent, exp.Dot): 8725 if not isinstance(parent.parent, exp.Dot): 8726 parent.replace(dot_or_id) 8727 break 8728 parent = parent.parent 8729 else: 8730 if column is node: 8731 node = dot_or_id 8732 else: 8733 column.replace(dot_or_id) 8734 return node 8735 8736 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8737 start = self._prev 8738 8739 # Not to be confused with TRUNCATE(number, decimals) function call 8740 if self._match(TokenType.L_PAREN): 8741 self._retreat(self._index - 2) 8742 return self._parse_function() 8743 8744 # Clickhouse supports TRUNCATE DATABASE as well 8745 is_database = self._match(TokenType.DATABASE) 8746 8747 self._match(TokenType.TABLE) 8748 8749 exists = self._parse_exists(not_=False) 8750 8751 expressions = self._parse_csv( 8752 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8753 ) 8754 8755 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8756 8757 if self._match_text_seq("RESTART", "IDENTITY"): 8758 identity = "RESTART" 8759 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8760 identity = "CONTINUE" 8761 else: 8762 identity = None 8763 8764 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8765 option = self._prev.text 8766 else: 8767 option = None 8768 8769 partition = self._parse_partition() 8770 8771 # Fallback case 8772 if self._curr: 8773 return self._parse_as_command(start) 8774 8775 return self.expression( 8776 exp.TruncateTable, 8777 expressions=expressions, 8778 is_database=is_database, 8779 exists=exists, 8780 cluster=cluster, 8781 identity=identity, 8782 option=option, 8783 partition=partition, 8784 ) 8785 8786 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8787 this = self._parse_ordered(self._parse_opclass) 8788 8789 if not self._match(TokenType.WITH): 8790 return this 8791 8792 op = self._parse_var(any_token=True, tokens=self.RESERVED_TOKENS) 8793 8794 return self.expression(exp.WithOperator, this=this, op=op) 8795 8796 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8797 self._match(TokenType.EQ) 8798 self._match(TokenType.L_PAREN) 8799 8800 opts: t.List[t.Optional[exp.Expression]] = [] 8801 option: exp.Expression | None 8802 while self._curr and not self._match(TokenType.R_PAREN): 8803 if self._match_text_seq("FORMAT_NAME", "="): 8804 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8805 option = self._parse_format_name() 8806 else: 8807 option = self._parse_property() 8808 8809 if option is None: 8810 self.raise_error("Unable to parse option") 8811 break 8812 8813 opts.append(option) 8814 8815 return opts 8816 8817 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8818 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8819 8820 options = [] 8821 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8822 option = self._parse_var(any_token=True) 8823 prev = self._prev.text.upper() 8824 8825 # Different dialects might separate options and values by white space, "=" and "AS" 8826 self._match(TokenType.EQ) 8827 self._match(TokenType.ALIAS) 8828 8829 param = self.expression(exp.CopyParameter, this=option) 8830 8831 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8832 TokenType.L_PAREN, advance=False 8833 ): 8834 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8835 param.set("expressions", self._parse_wrapped_options()) 8836 elif prev == "FILE_FORMAT": 8837 # T-SQL's external file format case 8838 param.set("expression", self._parse_field()) 8839 elif ( 8840 prev == "FORMAT" 8841 and self._prev.token_type == TokenType.ALIAS 8842 and self._match_texts(("AVRO", "JSON")) 8843 ): 8844 param.set("this", exp.var(f"FORMAT AS {self._prev.text.upper()}")) 8845 param.set("expression", self._parse_field()) 8846 else: 8847 param.set("expression", self._parse_unquoted_field() or self._parse_bracket()) 8848 8849 options.append(param) 8850 8851 if sep: 8852 self._match(sep) 8853 8854 return options 8855 8856 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8857 expr = self.expression(exp.Credentials) 8858 8859 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8860 expr.set("storage", self._parse_field()) 8861 if self._match_text_seq("CREDENTIALS"): 8862 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8863 creds = ( 8864 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8865 ) 8866 expr.set("credentials", creds) 8867 if self._match_text_seq("ENCRYPTION"): 8868 expr.set("encryption", self._parse_wrapped_options()) 8869 if self._match_text_seq("IAM_ROLE"): 8870 expr.set( 8871 "iam_role", 8872 exp.var(self._prev.text) if self._match(TokenType.DEFAULT) else self._parse_field(), 8873 ) 8874 if self._match_text_seq("REGION"): 8875 expr.set("region", self._parse_field()) 8876 8877 return expr 8878 8879 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8880 return self._parse_field() 8881 8882 def _parse_copy(self) -> exp.Copy | exp.Command: 8883 start = self._prev 8884 8885 self._match(TokenType.INTO) 8886 8887 this = ( 8888 self._parse_select(nested=True, parse_subquery_alias=False) 8889 if self._match(TokenType.L_PAREN, advance=False) 8890 else self._parse_table(schema=True) 8891 ) 8892 8893 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8894 8895 files = self._parse_csv(self._parse_file_location) 8896 if self._match(TokenType.EQ, advance=False): 8897 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 8898 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 8899 # list via `_parse_wrapped(..)` below. 8900 self._advance(-1) 8901 files = [] 8902 8903 credentials = self._parse_credentials() 8904 8905 self._match_text_seq("WITH") 8906 8907 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8908 8909 # Fallback case 8910 if self._curr: 8911 return self._parse_as_command(start) 8912 8913 return self.expression( 8914 exp.Copy, 8915 this=this, 8916 kind=kind, 8917 credentials=credentials, 8918 files=files, 8919 params=params, 8920 ) 8921 8922 def _parse_normalize(self) -> exp.Normalize: 8923 return self.expression( 8924 exp.Normalize, 8925 this=self._parse_bitwise(), 8926 form=self._match(TokenType.COMMA) and self._parse_var(), 8927 ) 8928 8929 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8930 args = self._parse_csv(lambda: self._parse_lambda()) 8931 8932 this = seq_get(args, 0) 8933 decimals = seq_get(args, 1) 8934 8935 return expr_type( 8936 this=this, 8937 decimals=decimals, 8938 to=self._parse_var() if self._match_text_seq("TO") else None, 8939 ) 8940 8941 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8942 star_token = self._prev 8943 8944 if self._match_text_seq("COLUMNS", "(", advance=False): 8945 this = self._parse_function() 8946 if isinstance(this, exp.Columns): 8947 this.set("unpack", True) 8948 return this 8949 8950 return self.expression( 8951 exp.Star, 8952 except_=self._parse_star_op("EXCEPT", "EXCLUDE"), 8953 replace=self._parse_star_op("REPLACE"), 8954 rename=self._parse_star_op("RENAME"), 8955 ).update_positions(star_token) 8956 8957 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8958 privilege_parts = [] 8959 8960 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8961 # (end of privilege list) or L_PAREN (start of column list) are met 8962 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8963 privilege_parts.append(self._curr.text.upper()) 8964 self._advance() 8965 8966 this = exp.var(" ".join(privilege_parts)) 8967 expressions = ( 8968 self._parse_wrapped_csv(self._parse_column) 8969 if self._match(TokenType.L_PAREN, advance=False) 8970 else None 8971 ) 8972 8973 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8974 8975 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8976 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8977 principal = self._parse_id_var() 8978 8979 if not principal: 8980 return None 8981 8982 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8983 8984 def _parse_grant_revoke_common( 8985 self, 8986 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8987 privileges = self._parse_csv(self._parse_grant_privilege) 8988 8989 self._match(TokenType.ON) 8990 kind = self._prev.text.upper() if self._match_set(self.CREATABLES) else None 8991 8992 # Attempt to parse the securable e.g. MySQL allows names 8993 # such as "foo.*", "*.*" which are not easily parseable yet 8994 securable = self._try_parse(self._parse_table_parts) 8995 8996 return privileges, kind, securable 8997 8998 def _parse_grant(self) -> exp.Grant | exp.Command: 8999 start = self._prev 9000 9001 privileges, kind, securable = self._parse_grant_revoke_common() 9002 9003 if not securable or not self._match_text_seq("TO"): 9004 return self._parse_as_command(start) 9005 9006 principals = self._parse_csv(self._parse_grant_principal) 9007 9008 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 9009 9010 if self._curr: 9011 return self._parse_as_command(start) 9012 9013 return self.expression( 9014 exp.Grant, 9015 privileges=privileges, 9016 kind=kind, 9017 securable=securable, 9018 principals=principals, 9019 grant_option=grant_option, 9020 ) 9021 9022 def _parse_revoke(self) -> exp.Revoke | exp.Command: 9023 start = self._prev 9024 9025 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 9026 9027 privileges, kind, securable = self._parse_grant_revoke_common() 9028 9029 if not securable or not self._match_text_seq("FROM"): 9030 return self._parse_as_command(start) 9031 9032 principals = self._parse_csv(self._parse_grant_principal) 9033 9034 cascade = None 9035 if self._match_texts(("CASCADE", "RESTRICT")): 9036 cascade = self._prev.text.upper() 9037 9038 if self._curr: 9039 return self._parse_as_command(start) 9040 9041 return self.expression( 9042 exp.Revoke, 9043 privileges=privileges, 9044 kind=kind, 9045 securable=securable, 9046 principals=principals, 9047 grant_option=grant_option, 9048 cascade=cascade, 9049 ) 9050 9051 def _parse_overlay(self) -> exp.Overlay: 9052 def _parse_overlay_arg(text: str) -> t.Optional[exp.Expression]: 9053 return ( 9054 self._parse_bitwise() 9055 if self._match(TokenType.COMMA) or self._match_text_seq(text) 9056 else None 9057 ) 9058 9059 return self.expression( 9060 exp.Overlay, 9061 this=self._parse_bitwise(), 9062 expression=_parse_overlay_arg("PLACING"), 9063 from_=_parse_overlay_arg("FROM"), 9064 for_=_parse_overlay_arg("FOR"), 9065 ) 9066 9067 def _parse_format_name(self) -> exp.Property: 9068 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 9069 # for FILE_FORMAT = <format_name> 9070 return self.expression( 9071 exp.Property, 9072 this=exp.var("FORMAT_NAME"), 9073 value=self._parse_string() or self._parse_table_parts(), 9074 ) 9075 9076 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 9077 args: t.List[exp.Expression] = [] 9078 9079 if self._match(TokenType.DISTINCT): 9080 args.append(self.expression(exp.Distinct, expressions=[self._parse_lambda()])) 9081 self._match(TokenType.COMMA) 9082 9083 args.extend(self._parse_function_args()) 9084 9085 return self.expression( 9086 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 9087 ) 9088 9089 def _identifier_expression( 9090 self, token: t.Optional[Token] = None, **kwargs: t.Any 9091 ) -> exp.Identifier: 9092 return self.expression(exp.Identifier, token=token or self._prev, **kwargs) 9093 9094 def _build_pipe_cte( 9095 self, 9096 query: exp.Query, 9097 expressions: t.List[exp.Expression], 9098 alias_cte: t.Optional[exp.TableAlias] = None, 9099 ) -> exp.Select: 9100 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 9101 if alias_cte: 9102 new_cte = alias_cte 9103 else: 9104 self._pipe_cte_counter += 1 9105 new_cte = f"__tmp{self._pipe_cte_counter}" 9106 9107 with_ = query.args.get("with_") 9108 ctes = with_.pop() if with_ else None 9109 9110 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 9111 if ctes: 9112 new_select.set("with_", ctes) 9113 9114 return new_select.with_(new_cte, as_=query, copy=False) 9115 9116 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 9117 select = self._parse_select(consume_pipe=False) 9118 if not select: 9119 return query 9120 9121 return self._build_pipe_cte( 9122 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 9123 ) 9124 9125 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 9126 limit = self._parse_limit() 9127 offset = self._parse_offset() 9128 if limit: 9129 curr_limit = query.args.get("limit", limit) 9130 if curr_limit.expression.to_py() >= limit.expression.to_py(): 9131 query.limit(limit, copy=False) 9132 if offset: 9133 curr_offset = query.args.get("offset") 9134 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 9135 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 9136 9137 return query 9138 9139 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 9140 this = self._parse_disjunction() 9141 if self._match_text_seq("GROUP", "AND", advance=False): 9142 return this 9143 9144 this = self._parse_alias(this) 9145 9146 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 9147 return self._parse_ordered(lambda: this) 9148 9149 return this 9150 9151 def _parse_pipe_syntax_aggregate_group_order_by( 9152 self, query: exp.Select, group_by_exists: bool = True 9153 ) -> exp.Select: 9154 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 9155 aggregates_or_groups, orders = [], [] 9156 for element in expr: 9157 if isinstance(element, exp.Ordered): 9158 this = element.this 9159 if isinstance(this, exp.Alias): 9160 element.set("this", this.args["alias"]) 9161 orders.append(element) 9162 else: 9163 this = element 9164 aggregates_or_groups.append(this) 9165 9166 if group_by_exists: 9167 query.select(*aggregates_or_groups, copy=False).group_by( 9168 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 9169 copy=False, 9170 ) 9171 else: 9172 query.select(*aggregates_or_groups, append=False, copy=False) 9173 9174 if orders: 9175 return query.order_by(*orders, append=False, copy=False) 9176 9177 return query 9178 9179 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 9180 self._match_text_seq("AGGREGATE") 9181 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 9182 9183 if self._match(TokenType.GROUP_BY) or ( 9184 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 9185 ): 9186 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 9187 9188 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9189 9190 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 9191 first_setop = self.parse_set_operation(this=query) 9192 if not first_setop: 9193 return None 9194 9195 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 9196 expr = self._parse_paren() 9197 return expr.assert_is(exp.Subquery).unnest() if expr else None 9198 9199 first_setop.this.pop() 9200 9201 setops = [ 9202 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 9203 *self._parse_csv(_parse_and_unwrap_query), 9204 ] 9205 9206 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9207 with_ = query.args.get("with_") 9208 ctes = with_.pop() if with_ else None 9209 9210 if isinstance(first_setop, exp.Union): 9211 query = query.union(*setops, copy=False, **first_setop.args) 9212 elif isinstance(first_setop, exp.Except): 9213 query = query.except_(*setops, copy=False, **first_setop.args) 9214 else: 9215 query = query.intersect(*setops, copy=False, **first_setop.args) 9216 9217 query.set("with_", ctes) 9218 9219 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9220 9221 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 9222 join = self._parse_join() 9223 if not join: 9224 return None 9225 9226 if isinstance(query, exp.Select): 9227 return query.join(join, copy=False) 9228 9229 return query 9230 9231 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 9232 pivots = self._parse_pivots() 9233 if not pivots: 9234 return query 9235 9236 from_ = query.args.get("from_") 9237 if from_: 9238 from_.this.set("pivots", pivots) 9239 9240 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9241 9242 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 9243 self._match_text_seq("EXTEND") 9244 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 9245 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9246 9247 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 9248 sample = self._parse_table_sample() 9249 9250 with_ = query.args.get("with_") 9251 if with_: 9252 with_.expressions[-1].this.set("sample", sample) 9253 else: 9254 query.set("sample", sample) 9255 9256 return query 9257 9258 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 9259 if isinstance(query, exp.Subquery): 9260 query = exp.select("*").from_(query, copy=False) 9261 9262 if not query.args.get("from_"): 9263 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 9264 9265 while self._match(TokenType.PIPE_GT): 9266 start = self._curr 9267 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 9268 if not parser: 9269 # The set operators (UNION, etc) and the JOIN operator have a few common starting 9270 # keywords, making it tricky to disambiguate them without lookahead. The approach 9271 # here is to try and parse a set operation and if that fails, then try to parse a 9272 # join operator. If that fails as well, then the operator is not supported. 9273 parsed_query = self._parse_pipe_syntax_set_operator(query) 9274 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 9275 if not parsed_query: 9276 self._retreat(start) 9277 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 9278 break 9279 query = parsed_query 9280 else: 9281 query = parser(self, query) 9282 9283 return query 9284 9285 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 9286 self._match_texts(("VAR", "VARIABLE")) 9287 9288 vars = self._parse_csv(self._parse_id_var) 9289 if not vars: 9290 return None 9291 9292 self._match(TokenType.ALIAS) 9293 kind = self._parse_schema() if self._match(TokenType.TABLE) else self._parse_types() 9294 default = ( 9295 self._match(TokenType.DEFAULT) or self._match(TokenType.EQ) 9296 ) and self._parse_bitwise() 9297 9298 return self.expression(exp.DeclareItem, this=vars, kind=kind, default=default) 9299 9300 def _parse_declare(self) -> exp.Declare | exp.Command: 9301 start = self._prev 9302 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 9303 9304 if not expressions or self._curr: 9305 return self._parse_as_command(start) 9306 9307 return self.expression(exp.Declare, expressions=expressions) 9308 9309 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 9310 exp_class = exp.Cast if strict else exp.TryCast 9311 9312 if exp_class == exp.TryCast: 9313 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 9314 9315 return self.expression(exp_class, **kwargs) 9316 9317 def _parse_json_value(self) -> exp.JSONValue: 9318 this = self._parse_bitwise() 9319 self._match(TokenType.COMMA) 9320 path = self._parse_bitwise() 9321 9322 returning = self._match(TokenType.RETURNING) and self._parse_type() 9323 9324 return self.expression( 9325 exp.JSONValue, 9326 this=this, 9327 path=self.dialect.to_json_path(path), 9328 returning=returning, 9329 on_condition=self._parse_on_condition(), 9330 ) 9331 9332 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 9333 def concat_exprs( 9334 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 9335 ) -> exp.Expression: 9336 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 9337 concat_exprs = [ 9338 self.expression( 9339 exp.Concat, 9340 expressions=node.expressions, 9341 safe=True, 9342 coalesce=self.dialect.CONCAT_COALESCE, 9343 ) 9344 ] 9345 node.set("expressions", concat_exprs) 9346 return node 9347 if len(exprs) == 1: 9348 return exprs[0] 9349 return self.expression( 9350 exp.Concat, expressions=args, safe=True, coalesce=self.dialect.CONCAT_COALESCE 9351 ) 9352 9353 args = self._parse_csv(self._parse_lambda) 9354 9355 if args: 9356 order = args[-1] if isinstance(args[-1], exp.Order) else None 9357 9358 if order: 9359 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 9360 # remove 'expr' from exp.Order and add it back to args 9361 args[-1] = order.this 9362 order.set("this", concat_exprs(order.this, args)) 9363 9364 this = order or concat_exprs(args[0], args) 9365 else: 9366 this = None 9367 9368 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 9369 9370 return self.expression(exp.GroupConcat, this=this, separator=separator) 9371 9372 def _parse_initcap(self) -> exp.Initcap: 9373 expr = exp.Initcap.from_arg_list(self._parse_function_args()) 9374 9375 # attach dialect's default delimiters 9376 if expr.args.get("expression") is None: 9377 expr.set("expression", exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS)) 9378 9379 return expr 9380 9381 def _parse_operator(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 9382 while True: 9383 if not self._match(TokenType.L_PAREN): 9384 break 9385 9386 op = "" 9387 while self._curr and not self._match(TokenType.R_PAREN): 9388 op += self._curr.text 9389 self._advance() 9390 9391 this = self.expression( 9392 exp.Operator, 9393 comments=self._prev_comments, 9394 this=this, 9395 operator=op, 9396 expression=self._parse_bitwise(), 9397 ) 9398 9399 if not self._match(TokenType.OPERATOR): 9400 break 9401 9402 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1709 def __init__( 1710 self, 1711 error_level: t.Optional[ErrorLevel] = None, 1712 error_message_context: int = 100, 1713 max_errors: int = 3, 1714 dialect: DialectType = None, 1715 ): 1716 from sqlglot.dialects import Dialect 1717 1718 self._core = ParserCore( 1719 error_level=error_level or ErrorLevel.IMMEDIATE, 1720 error_message_context=error_message_context, 1721 max_errors=max_errors, 1722 dialect=Dialect.get_or_raise(dialect), 1723 ) 1724 self._match = self._core._match 1725 self._match_set = self._core._match_set 1726 self._match_pair = self._core._match_pair 1727 self._match_texts = self._core._match_texts 1728 self._match_text_seq = self._core._match_text_seq 1729 self._advance = self._core._advance 1730 self._advance_chunk = self._core._advance_chunk 1731 self._retreat = self._core._retreat 1732 self._add_comments = self._core._add_comments 1733 self._is_connected = self._core._is_connected 1734 self._find_sql = self._core._find_sql 1735 self.raise_error = self._core.raise_error 1736 self.validate_expression = self._core.validate_expression 1737 self._try_parse = self._core._try_parse
1814 def parse(self, raw_tokens: t.List[Token], sql: str) -> t.List[t.Optional[exp.Expression]]: 1815 """ 1816 Parses a list of tokens and returns a list of syntax trees, one tree 1817 per parsed SQL statement. 1818 1819 Args: 1820 raw_tokens: The list of tokens. 1821 sql: The original SQL string. 1822 1823 Returns: 1824 The list of the produced syntax trees. 1825 """ 1826 return self._parse( 1827 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1828 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string.
Returns:
The list of the produced syntax trees.
1830 def parse_into( 1831 self, 1832 expression_types: exp.IntoType, 1833 raw_tokens: t.List[Token], 1834 sql: t.Optional[str] = None, 1835 ) -> t.List[t.Optional[exp.Expression]]: 1836 """ 1837 Parses a list of tokens into a given Expression type. If a collection of Expression 1838 types is given instead, this method will try to parse the token list into each one 1839 of them, stopping at the first for which the parsing succeeds. 1840 1841 Args: 1842 expression_types: The expression type(s) to try and parse the token list into. 1843 raw_tokens: The list of tokens. 1844 sql: The original SQL string, used to produce helpful debug messages. 1845 1846 Returns: 1847 The target Expression. 1848 """ 1849 errors = [] 1850 for expression_type in ensure_list(expression_types): 1851 parser = self.EXPRESSION_PARSERS.get(expression_type) 1852 if not parser: 1853 raise TypeError(f"No parser registered for {expression_type}") 1854 1855 try: 1856 return self._parse(parser, raw_tokens, sql) 1857 except ParseError as e: 1858 e.errors[0]["into_expression"] = expression_type 1859 errors.append(e) 1860 1861 raise ParseError( 1862 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1863 errors=merge_errors(errors), 1864 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1866 def check_errors(self) -> None: 1867 """Logs or raises any found errors, depending on the chosen error level setting.""" 1868 if self.error_level == ErrorLevel.WARN: 1869 for error in self.errors: 1870 logger.error(str(error)) 1871 elif self.error_level == ErrorLevel.RAISE and self.errors: 1872 raise ParseError( 1873 concat_messages(self.errors, self.max_errors), 1874 errors=merge_errors(self.errors), 1875 )
Logs or raises any found errors, depending on the chosen error level setting.
1877 def expression( 1878 self, 1879 exp_class: t.Type[E], 1880 token: t.Optional[Token] = None, 1881 comments: t.Optional[t.List[str]] = None, 1882 **kwargs, 1883 ) -> E: 1884 if token: 1885 instance = exp_class(this=token.text, **kwargs) 1886 instance.update_positions(token) 1887 else: 1888 instance = exp_class(**kwargs) 1889 instance.add_comments(comments) if comments else self._add_comments(instance) 1890 return self.validate_expression(instance)
5238 def parse_set_operation( 5239 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 5240 ) -> t.Optional[exp.Expression]: 5241 start = self._index 5242 _, side_token, kind_token = self._parse_join_parts() 5243 5244 side = side_token.text if side_token else None 5245 kind = kind_token.text if kind_token else None 5246 5247 if not self._match_set(self.SET_OPERATIONS): 5248 self._retreat(start) 5249 return None 5250 5251 token_type = self._prev.token_type 5252 5253 if token_type == TokenType.UNION: 5254 operation: t.Type[exp.SetOperation] = exp.Union 5255 elif token_type == TokenType.EXCEPT: 5256 operation = exp.Except 5257 else: 5258 operation = exp.Intersect 5259 5260 comments = self._prev.comments 5261 5262 if self._match(TokenType.DISTINCT): 5263 distinct: t.Optional[bool] = True 5264 elif self._match(TokenType.ALL): 5265 distinct = False 5266 else: 5267 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 5268 if distinct is None: 5269 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 5270 5271 by_name = ( 5272 self._match_text_seq("BY", "NAME") 5273 or self._match_text_seq("STRICT", "CORRESPONDING") 5274 or None 5275 ) 5276 if self._match_text_seq("CORRESPONDING"): 5277 by_name = True 5278 if not side and not kind: 5279 kind = "INNER" 5280 5281 on_column_list = None 5282 if by_name and self._match_texts(("ON", "BY")): 5283 on_column_list = self._parse_wrapped_csv(self._parse_column) 5284 5285 expression = self._parse_select( 5286 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 5287 ) 5288 5289 return self.expression( 5290 operation, 5291 comments=comments, 5292 this=this, 5293 distinct=distinct, 5294 by_name=by_name, 5295 expression=expression, 5296 side=side, 5297 kind=kind, 5298 on=on_column_list, 5299 )