sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot.dialects.dialect import ( 9 Dialect, 10 NormalizationStrategy, 11 arg_max_or_min_no_count, 12 binary_from_function, 13 date_add_interval_sql, 14 datestrtodate_sql, 15 build_formatted_time, 16 filter_array_using_unnest, 17 if_sql, 18 inline_array_unless_query, 19 max_or_greatest, 20 min_or_least, 21 no_ilike_sql, 22 build_date_delta_with_interval, 23 regexp_replace_sql, 24 rename_func, 25 sha256_sql, 26 timestrtotime_sql, 27 ts_or_ds_add_cast, 28 unit_to_var, 29) 30from sqlglot.helper import seq_get, split_num_words 31from sqlglot.tokens import TokenType 32 33if t.TYPE_CHECKING: 34 from sqlglot._typing import E, Lit 35 36logger = logging.getLogger("sqlglot") 37 38 39def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 40 if not expression.find_ancestor(exp.From, exp.Join): 41 return self.values_sql(expression) 42 43 structs = [] 44 alias = expression.args.get("alias") 45 for tup in expression.find_all(exp.Tuple): 46 field_aliases = ( 47 alias.columns 48 if alias and alias.columns 49 else (f"_c{i}" for i in range(len(tup.expressions))) 50 ) 51 expressions = [ 52 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 53 for name, fld in zip(field_aliases, tup.expressions) 54 ] 55 structs.append(exp.Struct(expressions=expressions)) 56 57 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 58 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 59 return self.unnest_sql( 60 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 61 ) 62 63 64def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 65 this = expression.this 66 if isinstance(this, exp.Schema): 67 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 68 else: 69 this = self.sql(this) 70 return f"RETURNS {this}" 71 72 73def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 74 returns = expression.find(exp.ReturnsProperty) 75 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 76 expression.set("kind", "TABLE FUNCTION") 77 78 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 79 expression.set("expression", expression.expression.this) 80 81 return self.create_sql(expression) 82 83 84# https://issuetracker.google.com/issues/162294746 85# workaround for bigquery bug when grouping by an expression and then ordering 86# WITH x AS (SELECT 1 y) 87# SELECT y + 1 z 88# FROM x 89# GROUP BY x + 1 90# ORDER by z 91def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 92 if isinstance(expression, exp.Select): 93 group = expression.args.get("group") 94 order = expression.args.get("order") 95 96 if group and order: 97 aliases = { 98 select.this: select.args["alias"] 99 for select in expression.selects 100 if isinstance(select, exp.Alias) 101 } 102 103 for grouped in group.expressions: 104 if grouped.is_int: 105 continue 106 alias = aliases.get(grouped) 107 if alias: 108 grouped.replace(exp.column(alias)) 109 110 return expression 111 112 113def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 114 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 115 if isinstance(expression, exp.CTE) and expression.alias_column_names: 116 cte_query = expression.this 117 118 if cte_query.is_star: 119 logger.warning( 120 "Can't push down CTE column names for star queries. Run the query through" 121 " the optimizer or use 'qualify' to expand the star projections first." 122 ) 123 return expression 124 125 column_names = expression.alias_column_names 126 expression.args["alias"].set("columns", None) 127 128 for name, select in zip(column_names, cte_query.selects): 129 to_replace = select 130 131 if isinstance(select, exp.Alias): 132 select = select.this 133 134 # Inner aliases are shadowed by the CTE column names 135 to_replace.replace(exp.alias_(select, name)) 136 137 return expression 138 139 140def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 141 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 142 this.set("zone", seq_get(args, 2)) 143 return this 144 145 146def _build_timestamp(args: t.List) -> exp.Timestamp: 147 timestamp = exp.Timestamp.from_arg_list(args) 148 timestamp.set("with_tz", True) 149 return timestamp 150 151 152def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 153 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 154 return expr_type.from_arg_list(args) 155 156 157def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 158 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 159 arg = seq_get(args, 0) 160 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 161 162 163def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 164 return self.sql( 165 exp.Exists( 166 this=exp.select("1") 167 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 168 .where(exp.column("_col").eq(expression.right)) 169 ) 170 ) 171 172 173def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 174 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 175 176 177def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 178 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 179 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 180 unit = unit_to_var(expression) 181 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 182 183 184def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 185 scale = expression.args.get("scale") 186 timestamp = expression.this 187 188 if scale in (None, exp.UnixToTime.SECONDS): 189 return self.func("TIMESTAMP_SECONDS", timestamp) 190 if scale == exp.UnixToTime.MILLIS: 191 return self.func("TIMESTAMP_MILLIS", timestamp) 192 if scale == exp.UnixToTime.MICROS: 193 return self.func("TIMESTAMP_MICROS", timestamp) 194 195 unix_seconds = exp.cast( 196 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 197 ) 198 return self.func("TIMESTAMP_SECONDS", unix_seconds) 199 200 201def _build_time(args: t.List) -> exp.Func: 202 if len(args) == 1: 203 return exp.TsOrDsToTime(this=args[0]) 204 if len(args) == 2: 205 return exp.Time.from_arg_list(args) 206 return exp.TimeFromParts.from_arg_list(args) 207 208 209def _build_datetime(args: t.List) -> exp.Func: 210 if len(args) == 1: 211 return exp.TsOrDsToTimestamp.from_arg_list(args) 212 if len(args) == 2: 213 return exp.Datetime.from_arg_list(args) 214 return exp.TimestampFromParts.from_arg_list(args) 215 216 217class BigQuery(Dialect): 218 WEEK_OFFSET = -1 219 UNNEST_COLUMN_ONLY = True 220 SUPPORTS_USER_DEFINED_TYPES = False 221 SUPPORTS_SEMI_ANTI_JOIN = False 222 LOG_BASE_FIRST = False 223 HEX_LOWERCASE = True 224 FORCE_EARLY_ALIAS_REF_EXPANSION = True 225 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 226 227 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 228 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 229 230 # bigquery udfs are case sensitive 231 NORMALIZE_FUNCTIONS = False 232 233 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 234 TIME_MAPPING = { 235 "%D": "%m/%d/%y", 236 "%E6S": "%S.%f", 237 } 238 239 FORMAT_MAPPING = { 240 "DD": "%d", 241 "MM": "%m", 242 "MON": "%b", 243 "MONTH": "%B", 244 "YYYY": "%Y", 245 "YY": "%y", 246 "HH": "%I", 247 "HH12": "%I", 248 "HH24": "%H", 249 "MI": "%M", 250 "SS": "%S", 251 "SSSSS": "%f", 252 "TZH": "%z", 253 } 254 255 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 256 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 257 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 258 259 def normalize_identifier(self, expression: E) -> E: 260 if ( 261 isinstance(expression, exp.Identifier) 262 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 263 ): 264 parent = expression.parent 265 while isinstance(parent, exp.Dot): 266 parent = parent.parent 267 268 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 269 # by default. The following check uses a heuristic to detect tables based on whether 270 # they are qualified. This should generally be correct, because tables in BigQuery 271 # must be qualified with at least a dataset, unless @@dataset_id is set. 272 case_sensitive = ( 273 isinstance(parent, exp.UserDefinedFunction) 274 or ( 275 isinstance(parent, exp.Table) 276 and parent.db 277 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 278 ) 279 or expression.meta.get("is_table") 280 ) 281 if not case_sensitive: 282 expression.set("this", expression.this.lower()) 283 284 return expression 285 286 class Tokenizer(tokens.Tokenizer): 287 QUOTES = ["'", '"', '"""', "'''"] 288 COMMENTS = ["--", "#", ("/*", "*/")] 289 IDENTIFIERS = ["`"] 290 STRING_ESCAPES = ["\\"] 291 292 HEX_STRINGS = [("0x", ""), ("0X", "")] 293 294 BYTE_STRINGS = [ 295 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 296 ] 297 298 RAW_STRINGS = [ 299 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 300 ] 301 302 KEYWORDS = { 303 **tokens.Tokenizer.KEYWORDS, 304 "ANY TYPE": TokenType.VARIANT, 305 "BEGIN": TokenType.COMMAND, 306 "BEGIN TRANSACTION": TokenType.BEGIN, 307 "BYTES": TokenType.BINARY, 308 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 309 "DATETIME": TokenType.TIMESTAMP, 310 "DECLARE": TokenType.COMMAND, 311 "ELSEIF": TokenType.COMMAND, 312 "EXCEPTION": TokenType.COMMAND, 313 "FLOAT64": TokenType.DOUBLE, 314 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 315 "MODEL": TokenType.MODEL, 316 "NOT DETERMINISTIC": TokenType.VOLATILE, 317 "RECORD": TokenType.STRUCT, 318 "TIMESTAMP": TokenType.TIMESTAMPTZ, 319 } 320 KEYWORDS.pop("DIV") 321 KEYWORDS.pop("VALUES") 322 KEYWORDS.pop("/*+") 323 324 class Parser(parser.Parser): 325 PREFIXED_PIVOT_COLUMNS = True 326 LOG_DEFAULTS_TO_LN = True 327 SUPPORTS_IMPLICIT_UNNEST = True 328 329 FUNCTIONS = { 330 **parser.Parser.FUNCTIONS, 331 "DATE": _build_date, 332 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 333 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 334 "DATE_TRUNC": lambda args: exp.DateTrunc( 335 unit=exp.Literal.string(str(seq_get(args, 1))), 336 this=seq_get(args, 0), 337 ), 338 "DATETIME": _build_datetime, 339 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 340 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 341 "DIV": binary_from_function(exp.IntDiv), 342 "FORMAT_DATE": lambda args: exp.TimeToStr( 343 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 344 ), 345 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 346 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 347 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 348 ), 349 "MD5": exp.MD5Digest.from_arg_list, 350 "TO_HEX": _build_to_hex, 351 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 352 [seq_get(args, 1), seq_get(args, 0)] 353 ), 354 "PARSE_TIMESTAMP": _build_parse_timestamp, 355 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 356 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 357 this=seq_get(args, 0), 358 expression=seq_get(args, 1), 359 position=seq_get(args, 2), 360 occurrence=seq_get(args, 3), 361 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 362 ), 363 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 364 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 365 "SPLIT": lambda args: exp.Split( 366 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 367 this=seq_get(args, 0), 368 expression=seq_get(args, 1) or exp.Literal.string(","), 369 ), 370 "TIME": _build_time, 371 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 372 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 373 "TIMESTAMP": _build_timestamp, 374 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 375 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 376 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 377 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 378 ), 379 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 380 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 381 ), 382 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 383 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 384 } 385 386 FUNCTION_PARSERS = { 387 **parser.Parser.FUNCTION_PARSERS, 388 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 389 } 390 FUNCTION_PARSERS.pop("TRIM") 391 392 NO_PAREN_FUNCTIONS = { 393 **parser.Parser.NO_PAREN_FUNCTIONS, 394 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 395 } 396 397 NESTED_TYPE_TOKENS = { 398 *parser.Parser.NESTED_TYPE_TOKENS, 399 TokenType.TABLE, 400 } 401 402 PROPERTY_PARSERS = { 403 **parser.Parser.PROPERTY_PARSERS, 404 "NOT DETERMINISTIC": lambda self: self.expression( 405 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 406 ), 407 "OPTIONS": lambda self: self._parse_with_property(), 408 } 409 410 CONSTRAINT_PARSERS = { 411 **parser.Parser.CONSTRAINT_PARSERS, 412 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 413 } 414 415 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 416 RANGE_PARSERS.pop(TokenType.OVERLAPS) 417 418 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 419 420 STATEMENT_PARSERS = { 421 **parser.Parser.STATEMENT_PARSERS, 422 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 423 TokenType.END: lambda self: self._parse_as_command(self._prev), 424 TokenType.FOR: lambda self: self._parse_for_in(), 425 } 426 427 BRACKET_OFFSETS = { 428 "OFFSET": (0, False), 429 "ORDINAL": (1, False), 430 "SAFE_OFFSET": (0, True), 431 "SAFE_ORDINAL": (1, True), 432 } 433 434 def _parse_for_in(self) -> exp.ForIn: 435 this = self._parse_range() 436 self._match_text_seq("DO") 437 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 438 439 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 440 this = super()._parse_table_part(schema=schema) or self._parse_number() 441 442 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 443 if isinstance(this, exp.Identifier): 444 table_name = this.name 445 while self._match(TokenType.DASH, advance=False) and self._next: 446 text = "" 447 while self._curr and self._curr.token_type != TokenType.DOT: 448 self._advance() 449 text += self._prev.text 450 table_name += text 451 452 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 453 elif isinstance(this, exp.Literal): 454 table_name = this.name 455 456 if self._is_connected() and self._parse_var(any_token=True): 457 table_name += self._prev.text 458 459 this = exp.Identifier(this=table_name, quoted=True) 460 461 return this 462 463 def _parse_table_parts( 464 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 465 ) -> exp.Table: 466 table = super()._parse_table_parts( 467 schema=schema, is_db_reference=is_db_reference, wildcard=True 468 ) 469 470 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 471 if not table.catalog: 472 if table.db: 473 parts = table.db.split(".") 474 if len(parts) == 2 and not table.args["db"].quoted: 475 table.set("catalog", exp.Identifier(this=parts[0])) 476 table.set("db", exp.Identifier(this=parts[1])) 477 else: 478 parts = table.name.split(".") 479 if len(parts) == 2 and not table.this.quoted: 480 table.set("db", exp.Identifier(this=parts[0])) 481 table.set("this", exp.Identifier(this=parts[1])) 482 483 if any("." in p.name for p in table.parts): 484 catalog, db, this, *rest = ( 485 exp.to_identifier(p, quoted=True) 486 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 487 ) 488 489 if rest and this: 490 this = exp.Dot.build([this, *rest]) # type: ignore 491 492 table = exp.Table( 493 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 494 ) 495 table.meta["quoted_table"] = True 496 497 return table 498 499 def _parse_column(self) -> t.Optional[exp.Expression]: 500 column = super()._parse_column() 501 if isinstance(column, exp.Column): 502 parts = column.parts 503 if any("." in p.name for p in parts): 504 catalog, db, table, this, *rest = ( 505 exp.to_identifier(p, quoted=True) 506 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 507 ) 508 509 if rest and this: 510 this = exp.Dot.build([this, *rest]) # type: ignore 511 512 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 513 column.meta["quoted_column"] = True 514 515 return column 516 517 @t.overload 518 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 519 520 @t.overload 521 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 522 523 def _parse_json_object(self, agg=False): 524 json_object = super()._parse_json_object() 525 array_kv_pair = seq_get(json_object.expressions, 0) 526 527 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 528 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 529 if ( 530 array_kv_pair 531 and isinstance(array_kv_pair.this, exp.Array) 532 and isinstance(array_kv_pair.expression, exp.Array) 533 ): 534 keys = array_kv_pair.this.expressions 535 values = array_kv_pair.expression.expressions 536 537 json_object.set( 538 "expressions", 539 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 540 ) 541 542 return json_object 543 544 def _parse_bracket( 545 self, this: t.Optional[exp.Expression] = None 546 ) -> t.Optional[exp.Expression]: 547 bracket = super()._parse_bracket(this) 548 549 if this is bracket: 550 return bracket 551 552 if isinstance(bracket, exp.Bracket): 553 for expression in bracket.expressions: 554 name = expression.name.upper() 555 556 if name not in self.BRACKET_OFFSETS: 557 break 558 559 offset, safe = self.BRACKET_OFFSETS[name] 560 bracket.set("offset", offset) 561 bracket.set("safe", safe) 562 expression.replace(expression.expressions[0]) 563 564 return bracket 565 566 class Generator(generator.Generator): 567 EXPLICIT_SET_OP = True 568 INTERVAL_ALLOWS_PLURAL_FORM = False 569 JOIN_HINTS = False 570 QUERY_HINTS = False 571 TABLE_HINTS = False 572 LIMIT_FETCH = "LIMIT" 573 RENAME_TABLE_WITH_DB = False 574 NVL2_SUPPORTED = False 575 UNNEST_WITH_ORDINALITY = False 576 COLLATE_IS_FUNC = True 577 LIMIT_ONLY_LITERALS = True 578 SUPPORTS_TABLE_ALIAS_COLUMNS = False 579 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 580 JSON_KEY_VALUE_PAIR_SEP = "," 581 NULL_ORDERING_SUPPORTED = False 582 IGNORE_NULLS_IN_FUNC = True 583 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 584 CAN_IMPLEMENT_ARRAY_ANY = True 585 SUPPORTS_TO_NUMBER = False 586 NAMED_PLACEHOLDER_TOKEN = "@" 587 HEX_FUNC = "TO_HEX" 588 WITH_PROPERTIES_PREFIX = "OPTIONS" 589 590 TRANSFORMS = { 591 **generator.Generator.TRANSFORMS, 592 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 593 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 594 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 595 exp.Array: inline_array_unless_query, 596 exp.ArrayContains: _array_contains_sql, 597 exp.ArrayFilter: filter_array_using_unnest, 598 exp.ArraySize: rename_func("ARRAY_LENGTH"), 599 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 600 exp.CollateProperty: lambda self, e: ( 601 f"DEFAULT COLLATE {self.sql(e, 'this')}" 602 if e.args.get("default") 603 else f"COLLATE {self.sql(e, 'this')}" 604 ), 605 exp.Commit: lambda *_: "COMMIT TRANSACTION", 606 exp.CountIf: rename_func("COUNTIF"), 607 exp.Create: _create_sql, 608 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 609 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 610 exp.DateDiff: lambda self, e: self.func( 611 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 612 ), 613 exp.DateFromParts: rename_func("DATE"), 614 exp.DateStrToDate: datestrtodate_sql, 615 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 616 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 617 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 618 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 619 exp.FromTimeZone: lambda self, e: self.func( 620 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 621 ), 622 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 623 exp.GroupConcat: rename_func("STRING_AGG"), 624 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 625 exp.If: if_sql(false_value="NULL"), 626 exp.ILike: no_ilike_sql, 627 exp.IntDiv: rename_func("DIV"), 628 exp.JSONFormat: rename_func("TO_JSON_STRING"), 629 exp.Max: max_or_greatest, 630 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 631 exp.MD5Digest: rename_func("MD5"), 632 exp.Min: min_or_least, 633 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 634 exp.RegexpExtract: lambda self, e: self.func( 635 "REGEXP_EXTRACT", 636 e.this, 637 e.expression, 638 e.args.get("position"), 639 e.args.get("occurrence"), 640 ), 641 exp.RegexpReplace: regexp_replace_sql, 642 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 643 exp.ReturnsProperty: _returnsproperty_sql, 644 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 645 exp.Select: transforms.preprocess( 646 [ 647 transforms.explode_to_unnest(), 648 transforms.unqualify_unnest, 649 transforms.eliminate_distinct_on, 650 _alias_ordered_group, 651 transforms.eliminate_semi_and_anti_joins, 652 ] 653 ), 654 exp.SHA: rename_func("SHA1"), 655 exp.SHA2: sha256_sql, 656 exp.StabilityProperty: lambda self, e: ( 657 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 658 ), 659 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 660 exp.StrToTime: lambda self, e: self.func( 661 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 662 ), 663 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 664 exp.TimeFromParts: rename_func("TIME"), 665 exp.TimestampFromParts: rename_func("DATETIME"), 666 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 667 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 668 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 669 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 670 exp.TimeStrToTime: timestrtotime_sql, 671 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 672 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 673 exp.TsOrDsAdd: _ts_or_ds_add_sql, 674 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 675 exp.TsOrDsToTime: rename_func("TIME"), 676 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 677 exp.Unhex: rename_func("FROM_HEX"), 678 exp.UnixDate: rename_func("UNIX_DATE"), 679 exp.UnixToTime: _unix_to_time_sql, 680 exp.Values: _derived_table_values_to_unnest, 681 exp.VariancePop: rename_func("VAR_POP"), 682 } 683 684 SUPPORTED_JSON_PATH_PARTS = { 685 exp.JSONPathKey, 686 exp.JSONPathRoot, 687 exp.JSONPathSubscript, 688 } 689 690 TYPE_MAPPING = { 691 **generator.Generator.TYPE_MAPPING, 692 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 693 exp.DataType.Type.BIGINT: "INT64", 694 exp.DataType.Type.BINARY: "BYTES", 695 exp.DataType.Type.BOOLEAN: "BOOL", 696 exp.DataType.Type.CHAR: "STRING", 697 exp.DataType.Type.DECIMAL: "NUMERIC", 698 exp.DataType.Type.DOUBLE: "FLOAT64", 699 exp.DataType.Type.FLOAT: "FLOAT64", 700 exp.DataType.Type.INT: "INT64", 701 exp.DataType.Type.NCHAR: "STRING", 702 exp.DataType.Type.NVARCHAR: "STRING", 703 exp.DataType.Type.SMALLINT: "INT64", 704 exp.DataType.Type.TEXT: "STRING", 705 exp.DataType.Type.TIMESTAMP: "DATETIME", 706 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 707 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 708 exp.DataType.Type.TINYINT: "INT64", 709 exp.DataType.Type.VARBINARY: "BYTES", 710 exp.DataType.Type.ROWVERSION: "BYTES", 711 exp.DataType.Type.VARCHAR: "STRING", 712 exp.DataType.Type.VARIANT: "ANY TYPE", 713 } 714 715 PROPERTIES_LOCATION = { 716 **generator.Generator.PROPERTIES_LOCATION, 717 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 718 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 719 } 720 721 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 722 RESERVED_KEYWORDS = { 723 "all", 724 "and", 725 "any", 726 "array", 727 "as", 728 "asc", 729 "assert_rows_modified", 730 "at", 731 "between", 732 "by", 733 "case", 734 "cast", 735 "collate", 736 "contains", 737 "create", 738 "cross", 739 "cube", 740 "current", 741 "default", 742 "define", 743 "desc", 744 "distinct", 745 "else", 746 "end", 747 "enum", 748 "escape", 749 "except", 750 "exclude", 751 "exists", 752 "extract", 753 "false", 754 "fetch", 755 "following", 756 "for", 757 "from", 758 "full", 759 "group", 760 "grouping", 761 "groups", 762 "hash", 763 "having", 764 "if", 765 "ignore", 766 "in", 767 "inner", 768 "intersect", 769 "interval", 770 "into", 771 "is", 772 "join", 773 "lateral", 774 "left", 775 "like", 776 "limit", 777 "lookup", 778 "merge", 779 "natural", 780 "new", 781 "no", 782 "not", 783 "null", 784 "nulls", 785 "of", 786 "on", 787 "or", 788 "order", 789 "outer", 790 "over", 791 "partition", 792 "preceding", 793 "proto", 794 "qualify", 795 "range", 796 "recursive", 797 "respect", 798 "right", 799 "rollup", 800 "rows", 801 "select", 802 "set", 803 "some", 804 "struct", 805 "tablesample", 806 "then", 807 "to", 808 "treat", 809 "true", 810 "unbounded", 811 "union", 812 "unnest", 813 "using", 814 "when", 815 "where", 816 "window", 817 "with", 818 "within", 819 } 820 821 def mod_sql(self, expression: exp.Mod) -> str: 822 this = expression.this 823 expr = expression.expression 824 return self.func( 825 "MOD", 826 this.unnest() if isinstance(this, exp.Paren) else this, 827 expr.unnest() if isinstance(expr, exp.Paren) else expr, 828 ) 829 830 def column_parts(self, expression: exp.Column) -> str: 831 if expression.meta.get("quoted_column"): 832 # If a column reference is of the form `dataset.table`.name, we need 833 # to preserve the quoted table path, otherwise the reference breaks 834 table_parts = ".".join(p.name for p in expression.parts[:-1]) 835 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 836 return f"{table_path}.{self.sql(expression, 'this')}" 837 838 return super().column_parts(expression) 839 840 def table_parts(self, expression: exp.Table) -> str: 841 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 842 # we need to make sure the correct quoting is used in each case. 843 # 844 # For example, if there is a CTE x that clashes with a schema name, then the former will 845 # return the table y in that schema, whereas the latter will return the CTE's y column: 846 # 847 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 848 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 849 if expression.meta.get("quoted_table"): 850 table_parts = ".".join(p.name for p in expression.parts) 851 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 852 853 return super().table_parts(expression) 854 855 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 856 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 857 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 858 859 def eq_sql(self, expression: exp.EQ) -> str: 860 # Operands of = cannot be NULL in BigQuery 861 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 862 if not isinstance(expression.parent, exp.Update): 863 return "NULL" 864 865 return self.binary(expression, "=") 866 867 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 868 parent = expression.parent 869 870 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 871 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 872 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 873 return self.func( 874 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 875 ) 876 877 return super().attimezone_sql(expression) 878 879 def trycast_sql(self, expression: exp.TryCast) -> str: 880 return self.cast_sql(expression, safe_prefix="SAFE_") 881 882 def bracket_sql(self, expression: exp.Bracket) -> str: 883 this = expression.this 884 expressions = expression.expressions 885 886 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 887 arg = expressions[0] 888 if arg.type is None: 889 from sqlglot.optimizer.annotate_types import annotate_types 890 891 arg = annotate_types(arg) 892 893 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 894 # BQ doesn't support bracket syntax with string values for structs 895 return f"{self.sql(this)}.{arg.name}" 896 897 expressions_sql = self.expressions(expression, flat=True) 898 offset = expression.args.get("offset") 899 900 if offset == 0: 901 expressions_sql = f"OFFSET({expressions_sql})" 902 elif offset == 1: 903 expressions_sql = f"ORDINAL({expressions_sql})" 904 elif offset is not None: 905 self.unsupported(f"Unsupported array offset: {offset}") 906 907 if expression.args.get("safe"): 908 expressions_sql = f"SAFE_{expressions_sql}" 909 910 return f"{self.sql(this)}[{expressions_sql}]" 911 912 def in_unnest_op(self, expression: exp.Unnest) -> str: 913 return self.sql(expression) 914 915 def except_op(self, expression: exp.Except) -> str: 916 if not expression.args.get("distinct"): 917 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 918 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 919 920 def intersect_op(self, expression: exp.Intersect) -> str: 921 if not expression.args.get("distinct"): 922 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 923 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 924 925 def version_sql(self, expression: exp.Version) -> str: 926 if expression.name == "TIMESTAMP": 927 expression.set("this", "SYSTEM_TIME") 928 return super().version_sql(expression)
218class BigQuery(Dialect): 219 WEEK_OFFSET = -1 220 UNNEST_COLUMN_ONLY = True 221 SUPPORTS_USER_DEFINED_TYPES = False 222 SUPPORTS_SEMI_ANTI_JOIN = False 223 LOG_BASE_FIRST = False 224 HEX_LOWERCASE = True 225 FORCE_EARLY_ALIAS_REF_EXPANSION = True 226 EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY = True 227 228 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 229 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 230 231 # bigquery udfs are case sensitive 232 NORMALIZE_FUNCTIONS = False 233 234 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 235 TIME_MAPPING = { 236 "%D": "%m/%d/%y", 237 "%E6S": "%S.%f", 238 } 239 240 FORMAT_MAPPING = { 241 "DD": "%d", 242 "MM": "%m", 243 "MON": "%b", 244 "MONTH": "%B", 245 "YYYY": "%Y", 246 "YY": "%y", 247 "HH": "%I", 248 "HH12": "%I", 249 "HH24": "%H", 250 "MI": "%M", 251 "SS": "%S", 252 "SSSSS": "%f", 253 "TZH": "%z", 254 } 255 256 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 257 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 258 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 259 260 def normalize_identifier(self, expression: E) -> E: 261 if ( 262 isinstance(expression, exp.Identifier) 263 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 264 ): 265 parent = expression.parent 266 while isinstance(parent, exp.Dot): 267 parent = parent.parent 268 269 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 270 # by default. The following check uses a heuristic to detect tables based on whether 271 # they are qualified. This should generally be correct, because tables in BigQuery 272 # must be qualified with at least a dataset, unless @@dataset_id is set. 273 case_sensitive = ( 274 isinstance(parent, exp.UserDefinedFunction) 275 or ( 276 isinstance(parent, exp.Table) 277 and parent.db 278 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 279 ) 280 or expression.meta.get("is_table") 281 ) 282 if not case_sensitive: 283 expression.set("this", expression.this.lower()) 284 285 return expression 286 287 class Tokenizer(tokens.Tokenizer): 288 QUOTES = ["'", '"', '"""', "'''"] 289 COMMENTS = ["--", "#", ("/*", "*/")] 290 IDENTIFIERS = ["`"] 291 STRING_ESCAPES = ["\\"] 292 293 HEX_STRINGS = [("0x", ""), ("0X", "")] 294 295 BYTE_STRINGS = [ 296 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 297 ] 298 299 RAW_STRINGS = [ 300 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 301 ] 302 303 KEYWORDS = { 304 **tokens.Tokenizer.KEYWORDS, 305 "ANY TYPE": TokenType.VARIANT, 306 "BEGIN": TokenType.COMMAND, 307 "BEGIN TRANSACTION": TokenType.BEGIN, 308 "BYTES": TokenType.BINARY, 309 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 310 "DATETIME": TokenType.TIMESTAMP, 311 "DECLARE": TokenType.COMMAND, 312 "ELSEIF": TokenType.COMMAND, 313 "EXCEPTION": TokenType.COMMAND, 314 "FLOAT64": TokenType.DOUBLE, 315 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 316 "MODEL": TokenType.MODEL, 317 "NOT DETERMINISTIC": TokenType.VOLATILE, 318 "RECORD": TokenType.STRUCT, 319 "TIMESTAMP": TokenType.TIMESTAMPTZ, 320 } 321 KEYWORDS.pop("DIV") 322 KEYWORDS.pop("VALUES") 323 KEYWORDS.pop("/*+") 324 325 class Parser(parser.Parser): 326 PREFIXED_PIVOT_COLUMNS = True 327 LOG_DEFAULTS_TO_LN = True 328 SUPPORTS_IMPLICIT_UNNEST = True 329 330 FUNCTIONS = { 331 **parser.Parser.FUNCTIONS, 332 "DATE": _build_date, 333 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 334 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 335 "DATE_TRUNC": lambda args: exp.DateTrunc( 336 unit=exp.Literal.string(str(seq_get(args, 1))), 337 this=seq_get(args, 0), 338 ), 339 "DATETIME": _build_datetime, 340 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 341 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 342 "DIV": binary_from_function(exp.IntDiv), 343 "FORMAT_DATE": lambda args: exp.TimeToStr( 344 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 345 ), 346 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 347 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 348 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 349 ), 350 "MD5": exp.MD5Digest.from_arg_list, 351 "TO_HEX": _build_to_hex, 352 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 353 [seq_get(args, 1), seq_get(args, 0)] 354 ), 355 "PARSE_TIMESTAMP": _build_parse_timestamp, 356 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 357 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 358 this=seq_get(args, 0), 359 expression=seq_get(args, 1), 360 position=seq_get(args, 2), 361 occurrence=seq_get(args, 3), 362 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 363 ), 364 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 365 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 366 "SPLIT": lambda args: exp.Split( 367 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 368 this=seq_get(args, 0), 369 expression=seq_get(args, 1) or exp.Literal.string(","), 370 ), 371 "TIME": _build_time, 372 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 373 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 374 "TIMESTAMP": _build_timestamp, 375 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 376 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 377 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 378 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 379 ), 380 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 381 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 382 ), 383 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 384 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 385 } 386 387 FUNCTION_PARSERS = { 388 **parser.Parser.FUNCTION_PARSERS, 389 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 390 } 391 FUNCTION_PARSERS.pop("TRIM") 392 393 NO_PAREN_FUNCTIONS = { 394 **parser.Parser.NO_PAREN_FUNCTIONS, 395 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 396 } 397 398 NESTED_TYPE_TOKENS = { 399 *parser.Parser.NESTED_TYPE_TOKENS, 400 TokenType.TABLE, 401 } 402 403 PROPERTY_PARSERS = { 404 **parser.Parser.PROPERTY_PARSERS, 405 "NOT DETERMINISTIC": lambda self: self.expression( 406 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 407 ), 408 "OPTIONS": lambda self: self._parse_with_property(), 409 } 410 411 CONSTRAINT_PARSERS = { 412 **parser.Parser.CONSTRAINT_PARSERS, 413 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 414 } 415 416 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 417 RANGE_PARSERS.pop(TokenType.OVERLAPS) 418 419 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 420 421 STATEMENT_PARSERS = { 422 **parser.Parser.STATEMENT_PARSERS, 423 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 424 TokenType.END: lambda self: self._parse_as_command(self._prev), 425 TokenType.FOR: lambda self: self._parse_for_in(), 426 } 427 428 BRACKET_OFFSETS = { 429 "OFFSET": (0, False), 430 "ORDINAL": (1, False), 431 "SAFE_OFFSET": (0, True), 432 "SAFE_ORDINAL": (1, True), 433 } 434 435 def _parse_for_in(self) -> exp.ForIn: 436 this = self._parse_range() 437 self._match_text_seq("DO") 438 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 439 440 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 441 this = super()._parse_table_part(schema=schema) or self._parse_number() 442 443 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 444 if isinstance(this, exp.Identifier): 445 table_name = this.name 446 while self._match(TokenType.DASH, advance=False) and self._next: 447 text = "" 448 while self._curr and self._curr.token_type != TokenType.DOT: 449 self._advance() 450 text += self._prev.text 451 table_name += text 452 453 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 454 elif isinstance(this, exp.Literal): 455 table_name = this.name 456 457 if self._is_connected() and self._parse_var(any_token=True): 458 table_name += self._prev.text 459 460 this = exp.Identifier(this=table_name, quoted=True) 461 462 return this 463 464 def _parse_table_parts( 465 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 466 ) -> exp.Table: 467 table = super()._parse_table_parts( 468 schema=schema, is_db_reference=is_db_reference, wildcard=True 469 ) 470 471 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 472 if not table.catalog: 473 if table.db: 474 parts = table.db.split(".") 475 if len(parts) == 2 and not table.args["db"].quoted: 476 table.set("catalog", exp.Identifier(this=parts[0])) 477 table.set("db", exp.Identifier(this=parts[1])) 478 else: 479 parts = table.name.split(".") 480 if len(parts) == 2 and not table.this.quoted: 481 table.set("db", exp.Identifier(this=parts[0])) 482 table.set("this", exp.Identifier(this=parts[1])) 483 484 if any("." in p.name for p in table.parts): 485 catalog, db, this, *rest = ( 486 exp.to_identifier(p, quoted=True) 487 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 488 ) 489 490 if rest and this: 491 this = exp.Dot.build([this, *rest]) # type: ignore 492 493 table = exp.Table( 494 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 495 ) 496 table.meta["quoted_table"] = True 497 498 return table 499 500 def _parse_column(self) -> t.Optional[exp.Expression]: 501 column = super()._parse_column() 502 if isinstance(column, exp.Column): 503 parts = column.parts 504 if any("." in p.name for p in parts): 505 catalog, db, table, this, *rest = ( 506 exp.to_identifier(p, quoted=True) 507 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 508 ) 509 510 if rest and this: 511 this = exp.Dot.build([this, *rest]) # type: ignore 512 513 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 514 column.meta["quoted_column"] = True 515 516 return column 517 518 @t.overload 519 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 520 521 @t.overload 522 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 523 524 def _parse_json_object(self, agg=False): 525 json_object = super()._parse_json_object() 526 array_kv_pair = seq_get(json_object.expressions, 0) 527 528 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 529 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 530 if ( 531 array_kv_pair 532 and isinstance(array_kv_pair.this, exp.Array) 533 and isinstance(array_kv_pair.expression, exp.Array) 534 ): 535 keys = array_kv_pair.this.expressions 536 values = array_kv_pair.expression.expressions 537 538 json_object.set( 539 "expressions", 540 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 541 ) 542 543 return json_object 544 545 def _parse_bracket( 546 self, this: t.Optional[exp.Expression] = None 547 ) -> t.Optional[exp.Expression]: 548 bracket = super()._parse_bracket(this) 549 550 if this is bracket: 551 return bracket 552 553 if isinstance(bracket, exp.Bracket): 554 for expression in bracket.expressions: 555 name = expression.name.upper() 556 557 if name not in self.BRACKET_OFFSETS: 558 break 559 560 offset, safe = self.BRACKET_OFFSETS[name] 561 bracket.set("offset", offset) 562 bracket.set("safe", safe) 563 expression.replace(expression.expressions[0]) 564 565 return bracket 566 567 class Generator(generator.Generator): 568 EXPLICIT_SET_OP = True 569 INTERVAL_ALLOWS_PLURAL_FORM = False 570 JOIN_HINTS = False 571 QUERY_HINTS = False 572 TABLE_HINTS = False 573 LIMIT_FETCH = "LIMIT" 574 RENAME_TABLE_WITH_DB = False 575 NVL2_SUPPORTED = False 576 UNNEST_WITH_ORDINALITY = False 577 COLLATE_IS_FUNC = True 578 LIMIT_ONLY_LITERALS = True 579 SUPPORTS_TABLE_ALIAS_COLUMNS = False 580 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 581 JSON_KEY_VALUE_PAIR_SEP = "," 582 NULL_ORDERING_SUPPORTED = False 583 IGNORE_NULLS_IN_FUNC = True 584 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 585 CAN_IMPLEMENT_ARRAY_ANY = True 586 SUPPORTS_TO_NUMBER = False 587 NAMED_PLACEHOLDER_TOKEN = "@" 588 HEX_FUNC = "TO_HEX" 589 WITH_PROPERTIES_PREFIX = "OPTIONS" 590 591 TRANSFORMS = { 592 **generator.Generator.TRANSFORMS, 593 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 594 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 595 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 596 exp.Array: inline_array_unless_query, 597 exp.ArrayContains: _array_contains_sql, 598 exp.ArrayFilter: filter_array_using_unnest, 599 exp.ArraySize: rename_func("ARRAY_LENGTH"), 600 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 601 exp.CollateProperty: lambda self, e: ( 602 f"DEFAULT COLLATE {self.sql(e, 'this')}" 603 if e.args.get("default") 604 else f"COLLATE {self.sql(e, 'this')}" 605 ), 606 exp.Commit: lambda *_: "COMMIT TRANSACTION", 607 exp.CountIf: rename_func("COUNTIF"), 608 exp.Create: _create_sql, 609 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 610 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 611 exp.DateDiff: lambda self, e: self.func( 612 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 613 ), 614 exp.DateFromParts: rename_func("DATE"), 615 exp.DateStrToDate: datestrtodate_sql, 616 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 617 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 618 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 619 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 620 exp.FromTimeZone: lambda self, e: self.func( 621 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 622 ), 623 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 624 exp.GroupConcat: rename_func("STRING_AGG"), 625 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 626 exp.If: if_sql(false_value="NULL"), 627 exp.ILike: no_ilike_sql, 628 exp.IntDiv: rename_func("DIV"), 629 exp.JSONFormat: rename_func("TO_JSON_STRING"), 630 exp.Max: max_or_greatest, 631 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 632 exp.MD5Digest: rename_func("MD5"), 633 exp.Min: min_or_least, 634 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 635 exp.RegexpExtract: lambda self, e: self.func( 636 "REGEXP_EXTRACT", 637 e.this, 638 e.expression, 639 e.args.get("position"), 640 e.args.get("occurrence"), 641 ), 642 exp.RegexpReplace: regexp_replace_sql, 643 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 644 exp.ReturnsProperty: _returnsproperty_sql, 645 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 646 exp.Select: transforms.preprocess( 647 [ 648 transforms.explode_to_unnest(), 649 transforms.unqualify_unnest, 650 transforms.eliminate_distinct_on, 651 _alias_ordered_group, 652 transforms.eliminate_semi_and_anti_joins, 653 ] 654 ), 655 exp.SHA: rename_func("SHA1"), 656 exp.SHA2: sha256_sql, 657 exp.StabilityProperty: lambda self, e: ( 658 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 659 ), 660 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 661 exp.StrToTime: lambda self, e: self.func( 662 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 663 ), 664 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 665 exp.TimeFromParts: rename_func("TIME"), 666 exp.TimestampFromParts: rename_func("DATETIME"), 667 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 668 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 669 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 670 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 671 exp.TimeStrToTime: timestrtotime_sql, 672 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 673 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 674 exp.TsOrDsAdd: _ts_or_ds_add_sql, 675 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 676 exp.TsOrDsToTime: rename_func("TIME"), 677 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 678 exp.Unhex: rename_func("FROM_HEX"), 679 exp.UnixDate: rename_func("UNIX_DATE"), 680 exp.UnixToTime: _unix_to_time_sql, 681 exp.Values: _derived_table_values_to_unnest, 682 exp.VariancePop: rename_func("VAR_POP"), 683 } 684 685 SUPPORTED_JSON_PATH_PARTS = { 686 exp.JSONPathKey, 687 exp.JSONPathRoot, 688 exp.JSONPathSubscript, 689 } 690 691 TYPE_MAPPING = { 692 **generator.Generator.TYPE_MAPPING, 693 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 694 exp.DataType.Type.BIGINT: "INT64", 695 exp.DataType.Type.BINARY: "BYTES", 696 exp.DataType.Type.BOOLEAN: "BOOL", 697 exp.DataType.Type.CHAR: "STRING", 698 exp.DataType.Type.DECIMAL: "NUMERIC", 699 exp.DataType.Type.DOUBLE: "FLOAT64", 700 exp.DataType.Type.FLOAT: "FLOAT64", 701 exp.DataType.Type.INT: "INT64", 702 exp.DataType.Type.NCHAR: "STRING", 703 exp.DataType.Type.NVARCHAR: "STRING", 704 exp.DataType.Type.SMALLINT: "INT64", 705 exp.DataType.Type.TEXT: "STRING", 706 exp.DataType.Type.TIMESTAMP: "DATETIME", 707 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 708 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 709 exp.DataType.Type.TINYINT: "INT64", 710 exp.DataType.Type.VARBINARY: "BYTES", 711 exp.DataType.Type.ROWVERSION: "BYTES", 712 exp.DataType.Type.VARCHAR: "STRING", 713 exp.DataType.Type.VARIANT: "ANY TYPE", 714 } 715 716 PROPERTIES_LOCATION = { 717 **generator.Generator.PROPERTIES_LOCATION, 718 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 719 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 720 } 721 722 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 723 RESERVED_KEYWORDS = { 724 "all", 725 "and", 726 "any", 727 "array", 728 "as", 729 "asc", 730 "assert_rows_modified", 731 "at", 732 "between", 733 "by", 734 "case", 735 "cast", 736 "collate", 737 "contains", 738 "create", 739 "cross", 740 "cube", 741 "current", 742 "default", 743 "define", 744 "desc", 745 "distinct", 746 "else", 747 "end", 748 "enum", 749 "escape", 750 "except", 751 "exclude", 752 "exists", 753 "extract", 754 "false", 755 "fetch", 756 "following", 757 "for", 758 "from", 759 "full", 760 "group", 761 "grouping", 762 "groups", 763 "hash", 764 "having", 765 "if", 766 "ignore", 767 "in", 768 "inner", 769 "intersect", 770 "interval", 771 "into", 772 "is", 773 "join", 774 "lateral", 775 "left", 776 "like", 777 "limit", 778 "lookup", 779 "merge", 780 "natural", 781 "new", 782 "no", 783 "not", 784 "null", 785 "nulls", 786 "of", 787 "on", 788 "or", 789 "order", 790 "outer", 791 "over", 792 "partition", 793 "preceding", 794 "proto", 795 "qualify", 796 "range", 797 "recursive", 798 "respect", 799 "right", 800 "rollup", 801 "rows", 802 "select", 803 "set", 804 "some", 805 "struct", 806 "tablesample", 807 "then", 808 "to", 809 "treat", 810 "true", 811 "unbounded", 812 "union", 813 "unnest", 814 "using", 815 "when", 816 "where", 817 "window", 818 "with", 819 "within", 820 } 821 822 def mod_sql(self, expression: exp.Mod) -> str: 823 this = expression.this 824 expr = expression.expression 825 return self.func( 826 "MOD", 827 this.unnest() if isinstance(this, exp.Paren) else this, 828 expr.unnest() if isinstance(expr, exp.Paren) else expr, 829 ) 830 831 def column_parts(self, expression: exp.Column) -> str: 832 if expression.meta.get("quoted_column"): 833 # If a column reference is of the form `dataset.table`.name, we need 834 # to preserve the quoted table path, otherwise the reference breaks 835 table_parts = ".".join(p.name for p in expression.parts[:-1]) 836 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 837 return f"{table_path}.{self.sql(expression, 'this')}" 838 839 return super().column_parts(expression) 840 841 def table_parts(self, expression: exp.Table) -> str: 842 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 843 # we need to make sure the correct quoting is used in each case. 844 # 845 # For example, if there is a CTE x that clashes with a schema name, then the former will 846 # return the table y in that schema, whereas the latter will return the CTE's y column: 847 # 848 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 849 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 850 if expression.meta.get("quoted_table"): 851 table_parts = ".".join(p.name for p in expression.parts) 852 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 853 854 return super().table_parts(expression) 855 856 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 857 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 858 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 859 860 def eq_sql(self, expression: exp.EQ) -> str: 861 # Operands of = cannot be NULL in BigQuery 862 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 863 if not isinstance(expression.parent, exp.Update): 864 return "NULL" 865 866 return self.binary(expression, "=") 867 868 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 869 parent = expression.parent 870 871 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 872 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 873 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 874 return self.func( 875 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 876 ) 877 878 return super().attimezone_sql(expression) 879 880 def trycast_sql(self, expression: exp.TryCast) -> str: 881 return self.cast_sql(expression, safe_prefix="SAFE_") 882 883 def bracket_sql(self, expression: exp.Bracket) -> str: 884 this = expression.this 885 expressions = expression.expressions 886 887 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 888 arg = expressions[0] 889 if arg.type is None: 890 from sqlglot.optimizer.annotate_types import annotate_types 891 892 arg = annotate_types(arg) 893 894 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 895 # BQ doesn't support bracket syntax with string values for structs 896 return f"{self.sql(this)}.{arg.name}" 897 898 expressions_sql = self.expressions(expression, flat=True) 899 offset = expression.args.get("offset") 900 901 if offset == 0: 902 expressions_sql = f"OFFSET({expressions_sql})" 903 elif offset == 1: 904 expressions_sql = f"ORDINAL({expressions_sql})" 905 elif offset is not None: 906 self.unsupported(f"Unsupported array offset: {offset}") 907 908 if expression.args.get("safe"): 909 expressions_sql = f"SAFE_{expressions_sql}" 910 911 return f"{self.sql(this)}[{expressions_sql}]" 912 913 def in_unnest_op(self, expression: exp.Unnest) -> str: 914 return self.sql(expression) 915 916 def except_op(self, expression: exp.Except) -> str: 917 if not expression.args.get("distinct"): 918 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 919 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 920 921 def intersect_op(self, expression: exp.Intersect) -> str: 922 if not expression.args.get("distinct"): 923 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 924 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 925 926 def version_sql(self, expression: exp.Version) -> str: 927 if expression.name == "TIMESTAMP": 928 expression.set("this", "SYSTEM_TIME") 929 return super().version_sql(expression)
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects "my_id" would refer to "data.my_id" (which is done in _qualify_columns()) across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether alias reference expansion before qualification should only happen for the GROUP BY clause.
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime
formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy')
.
If empty, the corresponding trie will be constructed off of TIME_MAPPING
.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT *
queries.
260 def normalize_identifier(self, expression: E) -> E: 261 if ( 262 isinstance(expression, exp.Identifier) 263 and self.normalization_strategy is not NormalizationStrategy.CASE_SENSITIVE 264 ): 265 parent = expression.parent 266 while isinstance(parent, exp.Dot): 267 parent = parent.parent 268 269 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 270 # by default. The following check uses a heuristic to detect tables based on whether 271 # they are qualified. This should generally be correct, because tables in BigQuery 272 # must be qualified with at least a dataset, unless @@dataset_id is set. 273 case_sensitive = ( 274 isinstance(parent, exp.UserDefinedFunction) 275 or ( 276 isinstance(parent, exp.Table) 277 and parent.db 278 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 279 ) 280 or expression.meta.get("is_table") 281 ) 282 if not case_sensitive: 283 expression.set("this", expression.this.lower()) 284 285 return expression
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO
would be resolved as foo
in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO
. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- COPY_PARAMS_ARE_CSV
- NULL_ORDERING
- TYPED_DIVISION
- SAFE_DIVISION
- CONCAT_COALESCE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- PREFER_CTE_ALIAS_COLUMN
- DATE_PART_MAPPING
- get_or_raise
- format_time
- settings
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
287 class Tokenizer(tokens.Tokenizer): 288 QUOTES = ["'", '"', '"""', "'''"] 289 COMMENTS = ["--", "#", ("/*", "*/")] 290 IDENTIFIERS = ["`"] 291 STRING_ESCAPES = ["\\"] 292 293 HEX_STRINGS = [("0x", ""), ("0X", "")] 294 295 BYTE_STRINGS = [ 296 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 297 ] 298 299 RAW_STRINGS = [ 300 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 301 ] 302 303 KEYWORDS = { 304 **tokens.Tokenizer.KEYWORDS, 305 "ANY TYPE": TokenType.VARIANT, 306 "BEGIN": TokenType.COMMAND, 307 "BEGIN TRANSACTION": TokenType.BEGIN, 308 "BYTES": TokenType.BINARY, 309 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 310 "DATETIME": TokenType.TIMESTAMP, 311 "DECLARE": TokenType.COMMAND, 312 "ELSEIF": TokenType.COMMAND, 313 "EXCEPTION": TokenType.COMMAND, 314 "FLOAT64": TokenType.DOUBLE, 315 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 316 "MODEL": TokenType.MODEL, 317 "NOT DETERMINISTIC": TokenType.VOLATILE, 318 "RECORD": TokenType.STRUCT, 319 "TIMESTAMP": TokenType.TIMESTAMPTZ, 320 } 321 KEYWORDS.pop("DIV") 322 KEYWORDS.pop("VALUES") 323 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
325 class Parser(parser.Parser): 326 PREFIXED_PIVOT_COLUMNS = True 327 LOG_DEFAULTS_TO_LN = True 328 SUPPORTS_IMPLICIT_UNNEST = True 329 330 FUNCTIONS = { 331 **parser.Parser.FUNCTIONS, 332 "DATE": _build_date, 333 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 334 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 335 "DATE_TRUNC": lambda args: exp.DateTrunc( 336 unit=exp.Literal.string(str(seq_get(args, 1))), 337 this=seq_get(args, 0), 338 ), 339 "DATETIME": _build_datetime, 340 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 341 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 342 "DIV": binary_from_function(exp.IntDiv), 343 "FORMAT_DATE": lambda args: exp.TimeToStr( 344 this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0) 345 ), 346 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 347 "JSON_EXTRACT_SCALAR": lambda args: exp.JSONExtractScalar( 348 this=seq_get(args, 0), expression=seq_get(args, 1) or exp.Literal.string("$") 349 ), 350 "MD5": exp.MD5Digest.from_arg_list, 351 "TO_HEX": _build_to_hex, 352 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 353 [seq_get(args, 1), seq_get(args, 0)] 354 ), 355 "PARSE_TIMESTAMP": _build_parse_timestamp, 356 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 357 "REGEXP_EXTRACT": lambda args: exp.RegexpExtract( 358 this=seq_get(args, 0), 359 expression=seq_get(args, 1), 360 position=seq_get(args, 2), 361 occurrence=seq_get(args, 3), 362 group=exp.Literal.number(1) if re.compile(args[1].name).groups == 1 else None, 363 ), 364 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 365 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 366 "SPLIT": lambda args: exp.Split( 367 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 368 this=seq_get(args, 0), 369 expression=seq_get(args, 1) or exp.Literal.string(","), 370 ), 371 "TIME": _build_time, 372 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 373 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 374 "TIMESTAMP": _build_timestamp, 375 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 376 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 377 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 378 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 379 ), 380 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 381 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 382 ), 383 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 384 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 385 } 386 387 FUNCTION_PARSERS = { 388 **parser.Parser.FUNCTION_PARSERS, 389 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 390 } 391 FUNCTION_PARSERS.pop("TRIM") 392 393 NO_PAREN_FUNCTIONS = { 394 **parser.Parser.NO_PAREN_FUNCTIONS, 395 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 396 } 397 398 NESTED_TYPE_TOKENS = { 399 *parser.Parser.NESTED_TYPE_TOKENS, 400 TokenType.TABLE, 401 } 402 403 PROPERTY_PARSERS = { 404 **parser.Parser.PROPERTY_PARSERS, 405 "NOT DETERMINISTIC": lambda self: self.expression( 406 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 407 ), 408 "OPTIONS": lambda self: self._parse_with_property(), 409 } 410 411 CONSTRAINT_PARSERS = { 412 **parser.Parser.CONSTRAINT_PARSERS, 413 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 414 } 415 416 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 417 RANGE_PARSERS.pop(TokenType.OVERLAPS) 418 419 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 420 421 STATEMENT_PARSERS = { 422 **parser.Parser.STATEMENT_PARSERS, 423 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 424 TokenType.END: lambda self: self._parse_as_command(self._prev), 425 TokenType.FOR: lambda self: self._parse_for_in(), 426 } 427 428 BRACKET_OFFSETS = { 429 "OFFSET": (0, False), 430 "ORDINAL": (1, False), 431 "SAFE_OFFSET": (0, True), 432 "SAFE_ORDINAL": (1, True), 433 } 434 435 def _parse_for_in(self) -> exp.ForIn: 436 this = self._parse_range() 437 self._match_text_seq("DO") 438 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 439 440 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 441 this = super()._parse_table_part(schema=schema) or self._parse_number() 442 443 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 444 if isinstance(this, exp.Identifier): 445 table_name = this.name 446 while self._match(TokenType.DASH, advance=False) and self._next: 447 text = "" 448 while self._curr and self._curr.token_type != TokenType.DOT: 449 self._advance() 450 text += self._prev.text 451 table_name += text 452 453 this = exp.Identifier(this=table_name, quoted=this.args.get("quoted")) 454 elif isinstance(this, exp.Literal): 455 table_name = this.name 456 457 if self._is_connected() and self._parse_var(any_token=True): 458 table_name += self._prev.text 459 460 this = exp.Identifier(this=table_name, quoted=True) 461 462 return this 463 464 def _parse_table_parts( 465 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 466 ) -> exp.Table: 467 table = super()._parse_table_parts( 468 schema=schema, is_db_reference=is_db_reference, wildcard=True 469 ) 470 471 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 472 if not table.catalog: 473 if table.db: 474 parts = table.db.split(".") 475 if len(parts) == 2 and not table.args["db"].quoted: 476 table.set("catalog", exp.Identifier(this=parts[0])) 477 table.set("db", exp.Identifier(this=parts[1])) 478 else: 479 parts = table.name.split(".") 480 if len(parts) == 2 and not table.this.quoted: 481 table.set("db", exp.Identifier(this=parts[0])) 482 table.set("this", exp.Identifier(this=parts[1])) 483 484 if any("." in p.name for p in table.parts): 485 catalog, db, this, *rest = ( 486 exp.to_identifier(p, quoted=True) 487 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 488 ) 489 490 if rest and this: 491 this = exp.Dot.build([this, *rest]) # type: ignore 492 493 table = exp.Table( 494 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 495 ) 496 table.meta["quoted_table"] = True 497 498 return table 499 500 def _parse_column(self) -> t.Optional[exp.Expression]: 501 column = super()._parse_column() 502 if isinstance(column, exp.Column): 503 parts = column.parts 504 if any("." in p.name for p in parts): 505 catalog, db, table, this, *rest = ( 506 exp.to_identifier(p, quoted=True) 507 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 508 ) 509 510 if rest and this: 511 this = exp.Dot.build([this, *rest]) # type: ignore 512 513 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 514 column.meta["quoted_column"] = True 515 516 return column 517 518 @t.overload 519 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 520 521 @t.overload 522 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 523 524 def _parse_json_object(self, agg=False): 525 json_object = super()._parse_json_object() 526 array_kv_pair = seq_get(json_object.expressions, 0) 527 528 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 529 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 530 if ( 531 array_kv_pair 532 and isinstance(array_kv_pair.this, exp.Array) 533 and isinstance(array_kv_pair.expression, exp.Array) 534 ): 535 keys = array_kv_pair.this.expressions 536 values = array_kv_pair.expression.expressions 537 538 json_object.set( 539 "expressions", 540 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 541 ) 542 543 return json_object 544 545 def _parse_bracket( 546 self, this: t.Optional[exp.Expression] = None 547 ) -> t.Optional[exp.Expression]: 548 bracket = super()._parse_bracket(this) 549 550 if this is bracket: 551 return bracket 552 553 if isinstance(bracket, exp.Bracket): 554 for expression in bracket.expressions: 555 name = expression.name.upper() 556 557 if name not in self.BRACKET_OFFSETS: 558 break 559 560 offset, safe = self.BRACKET_OFFSETS[name] 561 bracket.set("offset", offset) 562 bracket.set("safe", safe) 563 expression.replace(expression.expressions[0]) 564 565 return bracket
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- INTERVAL_VARS
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_JSON_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
567 class Generator(generator.Generator): 568 EXPLICIT_SET_OP = True 569 INTERVAL_ALLOWS_PLURAL_FORM = False 570 JOIN_HINTS = False 571 QUERY_HINTS = False 572 TABLE_HINTS = False 573 LIMIT_FETCH = "LIMIT" 574 RENAME_TABLE_WITH_DB = False 575 NVL2_SUPPORTED = False 576 UNNEST_WITH_ORDINALITY = False 577 COLLATE_IS_FUNC = True 578 LIMIT_ONLY_LITERALS = True 579 SUPPORTS_TABLE_ALIAS_COLUMNS = False 580 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 581 JSON_KEY_VALUE_PAIR_SEP = "," 582 NULL_ORDERING_SUPPORTED = False 583 IGNORE_NULLS_IN_FUNC = True 584 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 585 CAN_IMPLEMENT_ARRAY_ANY = True 586 SUPPORTS_TO_NUMBER = False 587 NAMED_PLACEHOLDER_TOKEN = "@" 588 HEX_FUNC = "TO_HEX" 589 WITH_PROPERTIES_PREFIX = "OPTIONS" 590 591 TRANSFORMS = { 592 **generator.Generator.TRANSFORMS, 593 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 594 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 595 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 596 exp.Array: inline_array_unless_query, 597 exp.ArrayContains: _array_contains_sql, 598 exp.ArrayFilter: filter_array_using_unnest, 599 exp.ArraySize: rename_func("ARRAY_LENGTH"), 600 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 601 exp.CollateProperty: lambda self, e: ( 602 f"DEFAULT COLLATE {self.sql(e, 'this')}" 603 if e.args.get("default") 604 else f"COLLATE {self.sql(e, 'this')}" 605 ), 606 exp.Commit: lambda *_: "COMMIT TRANSACTION", 607 exp.CountIf: rename_func("COUNTIF"), 608 exp.Create: _create_sql, 609 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 610 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 611 exp.DateDiff: lambda self, e: self.func( 612 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 613 ), 614 exp.DateFromParts: rename_func("DATE"), 615 exp.DateStrToDate: datestrtodate_sql, 616 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 617 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 618 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 619 exp.DateTrunc: lambda self, e: self.func("DATE_TRUNC", e.this, e.text("unit")), 620 exp.FromTimeZone: lambda self, e: self.func( 621 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 622 ), 623 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 624 exp.GroupConcat: rename_func("STRING_AGG"), 625 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 626 exp.If: if_sql(false_value="NULL"), 627 exp.ILike: no_ilike_sql, 628 exp.IntDiv: rename_func("DIV"), 629 exp.JSONFormat: rename_func("TO_JSON_STRING"), 630 exp.Max: max_or_greatest, 631 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 632 exp.MD5Digest: rename_func("MD5"), 633 exp.Min: min_or_least, 634 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 635 exp.RegexpExtract: lambda self, e: self.func( 636 "REGEXP_EXTRACT", 637 e.this, 638 e.expression, 639 e.args.get("position"), 640 e.args.get("occurrence"), 641 ), 642 exp.RegexpReplace: regexp_replace_sql, 643 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 644 exp.ReturnsProperty: _returnsproperty_sql, 645 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 646 exp.Select: transforms.preprocess( 647 [ 648 transforms.explode_to_unnest(), 649 transforms.unqualify_unnest, 650 transforms.eliminate_distinct_on, 651 _alias_ordered_group, 652 transforms.eliminate_semi_and_anti_joins, 653 ] 654 ), 655 exp.SHA: rename_func("SHA1"), 656 exp.SHA2: sha256_sql, 657 exp.StabilityProperty: lambda self, e: ( 658 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 659 ), 660 exp.StrToDate: lambda self, e: self.func("PARSE_DATE", self.format_time(e), e.this), 661 exp.StrToTime: lambda self, e: self.func( 662 "PARSE_TIMESTAMP", self.format_time(e), e.this, e.args.get("zone") 663 ), 664 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 665 exp.TimeFromParts: rename_func("TIME"), 666 exp.TimestampFromParts: rename_func("DATETIME"), 667 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 668 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 669 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 670 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 671 exp.TimeStrToTime: timestrtotime_sql, 672 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 673 exp.Trim: lambda self, e: self.func("TRIM", e.this, e.expression), 674 exp.TsOrDsAdd: _ts_or_ds_add_sql, 675 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 676 exp.TsOrDsToTime: rename_func("TIME"), 677 exp.TsOrDsToTimestamp: rename_func("DATETIME"), 678 exp.Unhex: rename_func("FROM_HEX"), 679 exp.UnixDate: rename_func("UNIX_DATE"), 680 exp.UnixToTime: _unix_to_time_sql, 681 exp.Values: _derived_table_values_to_unnest, 682 exp.VariancePop: rename_func("VAR_POP"), 683 } 684 685 SUPPORTED_JSON_PATH_PARTS = { 686 exp.JSONPathKey, 687 exp.JSONPathRoot, 688 exp.JSONPathSubscript, 689 } 690 691 TYPE_MAPPING = { 692 **generator.Generator.TYPE_MAPPING, 693 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 694 exp.DataType.Type.BIGINT: "INT64", 695 exp.DataType.Type.BINARY: "BYTES", 696 exp.DataType.Type.BOOLEAN: "BOOL", 697 exp.DataType.Type.CHAR: "STRING", 698 exp.DataType.Type.DECIMAL: "NUMERIC", 699 exp.DataType.Type.DOUBLE: "FLOAT64", 700 exp.DataType.Type.FLOAT: "FLOAT64", 701 exp.DataType.Type.INT: "INT64", 702 exp.DataType.Type.NCHAR: "STRING", 703 exp.DataType.Type.NVARCHAR: "STRING", 704 exp.DataType.Type.SMALLINT: "INT64", 705 exp.DataType.Type.TEXT: "STRING", 706 exp.DataType.Type.TIMESTAMP: "DATETIME", 707 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 708 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 709 exp.DataType.Type.TINYINT: "INT64", 710 exp.DataType.Type.VARBINARY: "BYTES", 711 exp.DataType.Type.ROWVERSION: "BYTES", 712 exp.DataType.Type.VARCHAR: "STRING", 713 exp.DataType.Type.VARIANT: "ANY TYPE", 714 } 715 716 PROPERTIES_LOCATION = { 717 **generator.Generator.PROPERTIES_LOCATION, 718 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 719 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 720 } 721 722 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 723 RESERVED_KEYWORDS = { 724 "all", 725 "and", 726 "any", 727 "array", 728 "as", 729 "asc", 730 "assert_rows_modified", 731 "at", 732 "between", 733 "by", 734 "case", 735 "cast", 736 "collate", 737 "contains", 738 "create", 739 "cross", 740 "cube", 741 "current", 742 "default", 743 "define", 744 "desc", 745 "distinct", 746 "else", 747 "end", 748 "enum", 749 "escape", 750 "except", 751 "exclude", 752 "exists", 753 "extract", 754 "false", 755 "fetch", 756 "following", 757 "for", 758 "from", 759 "full", 760 "group", 761 "grouping", 762 "groups", 763 "hash", 764 "having", 765 "if", 766 "ignore", 767 "in", 768 "inner", 769 "intersect", 770 "interval", 771 "into", 772 "is", 773 "join", 774 "lateral", 775 "left", 776 "like", 777 "limit", 778 "lookup", 779 "merge", 780 "natural", 781 "new", 782 "no", 783 "not", 784 "null", 785 "nulls", 786 "of", 787 "on", 788 "or", 789 "order", 790 "outer", 791 "over", 792 "partition", 793 "preceding", 794 "proto", 795 "qualify", 796 "range", 797 "recursive", 798 "respect", 799 "right", 800 "rollup", 801 "rows", 802 "select", 803 "set", 804 "some", 805 "struct", 806 "tablesample", 807 "then", 808 "to", 809 "treat", 810 "true", 811 "unbounded", 812 "union", 813 "unnest", 814 "using", 815 "when", 816 "where", 817 "window", 818 "with", 819 "within", 820 } 821 822 def mod_sql(self, expression: exp.Mod) -> str: 823 this = expression.this 824 expr = expression.expression 825 return self.func( 826 "MOD", 827 this.unnest() if isinstance(this, exp.Paren) else this, 828 expr.unnest() if isinstance(expr, exp.Paren) else expr, 829 ) 830 831 def column_parts(self, expression: exp.Column) -> str: 832 if expression.meta.get("quoted_column"): 833 # If a column reference is of the form `dataset.table`.name, we need 834 # to preserve the quoted table path, otherwise the reference breaks 835 table_parts = ".".join(p.name for p in expression.parts[:-1]) 836 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 837 return f"{table_path}.{self.sql(expression, 'this')}" 838 839 return super().column_parts(expression) 840 841 def table_parts(self, expression: exp.Table) -> str: 842 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 843 # we need to make sure the correct quoting is used in each case. 844 # 845 # For example, if there is a CTE x that clashes with a schema name, then the former will 846 # return the table y in that schema, whereas the latter will return the CTE's y column: 847 # 848 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 849 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 850 if expression.meta.get("quoted_table"): 851 table_parts = ".".join(p.name for p in expression.parts) 852 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 853 854 return super().table_parts(expression) 855 856 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 857 this = expression.this if isinstance(expression.this, exp.TsOrDsToDate) else expression 858 return self.func("FORMAT_DATE", self.format_time(expression), this.this) 859 860 def eq_sql(self, expression: exp.EQ) -> str: 861 # Operands of = cannot be NULL in BigQuery 862 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 863 if not isinstance(expression.parent, exp.Update): 864 return "NULL" 865 866 return self.binary(expression, "=") 867 868 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 869 parent = expression.parent 870 871 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 872 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 873 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 874 return self.func( 875 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 876 ) 877 878 return super().attimezone_sql(expression) 879 880 def trycast_sql(self, expression: exp.TryCast) -> str: 881 return self.cast_sql(expression, safe_prefix="SAFE_") 882 883 def bracket_sql(self, expression: exp.Bracket) -> str: 884 this = expression.this 885 expressions = expression.expressions 886 887 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 888 arg = expressions[0] 889 if arg.type is None: 890 from sqlglot.optimizer.annotate_types import annotate_types 891 892 arg = annotate_types(arg) 893 894 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 895 # BQ doesn't support bracket syntax with string values for structs 896 return f"{self.sql(this)}.{arg.name}" 897 898 expressions_sql = self.expressions(expression, flat=True) 899 offset = expression.args.get("offset") 900 901 if offset == 0: 902 expressions_sql = f"OFFSET({expressions_sql})" 903 elif offset == 1: 904 expressions_sql = f"ORDINAL({expressions_sql})" 905 elif offset is not None: 906 self.unsupported(f"Unsupported array offset: {offset}") 907 908 if expression.args.get("safe"): 909 expressions_sql = f"SAFE_{expressions_sql}" 910 911 return f"{self.sql(this)}[{expressions_sql}]" 912 913 def in_unnest_op(self, expression: exp.Unnest) -> str: 914 return self.sql(expression) 915 916 def except_op(self, expression: exp.Except) -> str: 917 if not expression.args.get("distinct"): 918 self.unsupported("EXCEPT without DISTINCT is not supported in BigQuery") 919 return f"EXCEPT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 920 921 def intersect_op(self, expression: exp.Intersect) -> str: 922 if not expression.args.get("distinct"): 923 self.unsupported("INTERSECT without DISTINCT is not supported in BigQuery") 924 return f"INTERSECT{' DISTINCT' if expression.args.get('distinct') else ' ALL'}" 925 926 def version_sql(self, expression: exp.Version) -> str: 927 if expression.name == "TIMESTAMP": 928 expression.set("this", "SYSTEM_TIME") 929 return super().version_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
831 def column_parts(self, expression: exp.Column) -> str: 832 if expression.meta.get("quoted_column"): 833 # If a column reference is of the form `dataset.table`.name, we need 834 # to preserve the quoted table path, otherwise the reference breaks 835 table_parts = ".".join(p.name for p in expression.parts[:-1]) 836 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 837 return f"{table_path}.{self.sql(expression, 'this')}" 838 839 return super().column_parts(expression)
841 def table_parts(self, expression: exp.Table) -> str: 842 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 843 # we need to make sure the correct quoting is used in each case. 844 # 845 # For example, if there is a CTE x that clashes with a schema name, then the former will 846 # return the table y in that schema, whereas the latter will return the CTE's y column: 847 # 848 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 849 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 850 if expression.meta.get("quoted_table"): 851 table_parts = ".".join(p.name for p in expression.parts) 852 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 853 854 return super().table_parts(expression)
868 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 869 parent = expression.parent 870 871 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 872 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 873 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 874 return self.func( 875 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 876 ) 877 878 return super().attimezone_sql(expression)
883 def bracket_sql(self, expression: exp.Bracket) -> str: 884 this = expression.this 885 expressions = expression.expressions 886 887 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 888 arg = expressions[0] 889 if arg.type is None: 890 from sqlglot.optimizer.annotate_types import annotate_types 891 892 arg = annotate_types(arg) 893 894 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 895 # BQ doesn't support bracket syntax with string values for structs 896 return f"{self.sql(this)}.{arg.name}" 897 898 expressions_sql = self.expressions(expression, flat=True) 899 offset = expression.args.get("offset") 900 901 if offset == 0: 902 expressions_sql = f"OFFSET({expressions_sql})" 903 elif offset == 1: 904 expressions_sql = f"ORDINAL({expressions_sql})" 905 elif offset is not None: 906 self.unsupported(f"Unsupported array offset: {offset}") 907 908 if expression.args.get("safe"): 909 expressions_sql = f"SAFE_{expressions_sql}" 910 911 return f"{self.sql(this)}[{expressions_sql}]"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- intersect_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- set_operations
- union_sql
- union_op
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- currenttimestamp_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- renametable_sql
- renamecolumn_sql
- alterset_sql
- altertable_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- operator_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- generateseries_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql