mz_sql_parser/ast/defs/expr.rs
1// Copyright 2018 sqlparser-rs contributors. All rights reserved.
2// Copyright Materialize, Inc. and contributors. All rights reserved.
3//
4// This file is derived from the sqlparser-rs project, available at
5// https://github.com/andygrove/sqlparser-rs. It was incorporated
6// directly into Materialize on December 21, 2019.
7//
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License in the LICENSE file at the
11// root of this repository, or online at
12//
13// http://www.apache.org/licenses/LICENSE-2.0
14//
15// Unless required by applicable law or agreed to in writing, software
16// distributed under the License is distributed on an "AS IS" BASIS,
17// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18// See the License for the specific language governing permissions and
19// limitations under the License.
20
21use std::{fmt, mem};
22
23use itertools::Itertools;
24use mz_ore::soft_assert_eq_or_log;
25use mz_sql_lexer::keywords::*;
26
27use crate::ast::display::{self, AstDisplay, AstFormatter};
28use crate::ast::{AstInfo, Ident, OrderByExpr, Query, UnresolvedItemName, Value};
29
30/// An SQL expression of any type.
31///
32/// The parser does not distinguish between expressions of different types
33/// (e.g. boolean vs string), so the caller must handle expressions of
34/// inappropriate type, like `WHERE 1` or `SELECT 1=1`, as necessary.
35#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
36pub enum Expr<T: AstInfo> {
37 /// Identifier e.g. table name or column name. The parser always
38 /// constructs this with a non-empty `Vec`.
39 Identifier(Vec<Ident>),
40 /// Qualified wildcard, e.g. `alias.*` or `schema.table.*`.
41 QualifiedWildcard(Vec<Ident>),
42 /// A field access, like `(expr).foo`.
43 FieldAccess {
44 expr: Box<Expr<T>>,
45 field: Ident,
46 },
47 /// A wildcard field access, like `(expr).*`.
48 ///
49 /// Note that this is different from `QualifiedWildcard` in that the
50 /// wildcard access occurs on an arbitrary expression, rather than a
51 /// qualified name. The distinction is important for PostgreSQL
52 /// compatibility.
53 WildcardAccess(Box<Expr<T>>),
54 /// A positional parameter, e.g., `$1` or `$42`
55 Parameter(usize),
56 /// Boolean negation
57 Not {
58 expr: Box<Expr<T>>,
59 },
60 /// Boolean and
61 And {
62 left: Box<Expr<T>>,
63 right: Box<Expr<T>>,
64 },
65 /// Boolean or
66 Or {
67 left: Box<Expr<T>>,
68 right: Box<Expr<T>>,
69 },
70 /// `IS {NULL, TRUE, FALSE, UNKNOWN}` expression
71 IsExpr {
72 expr: Box<Expr<T>>,
73 construct: IsExprConstruct<T>,
74 negated: bool,
75 },
76 /// `[ NOT ] IN (val1, val2, ...)`
77 InList {
78 expr: Box<Expr<T>>,
79 list: Vec<Expr<T>>,
80 negated: bool,
81 },
82 /// `[ NOT ] IN (SELECT ...)`
83 InSubquery {
84 expr: Box<Expr<T>>,
85 subquery: Box<Query<T>>,
86 negated: bool,
87 },
88 /// `<expr> [ NOT ] {LIKE, ILIKE} <pattern> [ ESCAPE <escape> ]`
89 Like {
90 expr: Box<Expr<T>>,
91 pattern: Box<Expr<T>>,
92 escape: Option<Box<Expr<T>>>,
93 case_insensitive: bool,
94 negated: bool,
95 },
96 /// `<expr> [ NOT ] BETWEEN <low> AND <high>`
97 Between {
98 expr: Box<Expr<T>>,
99 negated: bool,
100 low: Box<Expr<T>>,
101 high: Box<Expr<T>>,
102 },
103 /// Unary or binary operator
104 Op {
105 op: Op,
106 expr1: Box<Expr<T>>,
107 expr2: Option<Box<Expr<T>>>,
108 },
109 /// CAST an expression to a different data type e.g. `CAST(foo AS VARCHAR(123))`
110 Cast {
111 expr: Box<Expr<T>>,
112 data_type: T::DataType,
113 },
114 /// `expr COLLATE collation`
115 Collate {
116 expr: Box<Expr<T>>,
117 collation: UnresolvedItemName,
118 },
119 /// `COALESCE(<expr>, ...)` or `GREATEST(<expr>, ...)` or `LEAST(<expr>`, ...)
120 ///
121 /// While COALESCE/GREATEST/LEAST have the same syntax as a function call,
122 /// their semantics are extremely unusual, and are better captured with a
123 /// dedicated AST node.
124 HomogenizingFunction {
125 function: HomogenizingFunction,
126 exprs: Vec<Expr<T>>,
127 },
128 /// NULLIF(expr, expr)
129 ///
130 /// While NULLIF has the same syntax as a function call, it is not evaluated
131 /// as a function within Postgres.
132 NullIf {
133 l_expr: Box<Expr<T>>,
134 r_expr: Box<Expr<T>>,
135 },
136 /// Nested expression e.g. `(foo > bar)` or `(1)`
137 Nested(Box<Expr<T>>),
138 /// A row constructor like `ROW(<expr>...)` or `(<expr>, <expr>...)`.
139 Row {
140 exprs: Vec<Expr<T>>,
141 },
142 /// A literal value, such as string, number, date or NULL
143 Value(Value),
144 /// Scalar function call e.g. `LEFT(foo, 5)`
145 Function(Function<T>),
146 /// `CASE [<operand>] WHEN <condition> THEN <result> ... [ELSE <result>] END`
147 ///
148 /// Note we only recognize a complete single expression as `<condition>`,
149 /// not `< 0` nor `1, 2, 3` as allowed in a `<simple when clause>` per
150 /// <https://jakewheat.github.io/sql-overview/sql-2011-foundation-grammar.html#simple-when-clause>
151 Case {
152 operand: Option<Box<Expr<T>>>,
153 conditions: Vec<Expr<T>>,
154 results: Vec<Expr<T>>,
155 else_result: Option<Box<Expr<T>>>,
156 },
157 /// An exists expression `EXISTS(SELECT ...)`, used in expressions like
158 /// `WHERE EXISTS (SELECT ...)`.
159 Exists(Box<Query<T>>),
160 /// A parenthesized subquery `(SELECT ...)`, used in expression like
161 /// `SELECT (subquery) AS x` or `WHERE (subquery) = x`
162 Subquery(Box<Query<T>>),
163 /// `<expr> <op> ANY/SOME (<query>)`
164 AnySubquery {
165 left: Box<Expr<T>>,
166 op: Op,
167 right: Box<Query<T>>,
168 },
169 /// `<expr> <op> ANY (<array_expr>)`
170 AnyExpr {
171 left: Box<Expr<T>>,
172 op: Op,
173 right: Box<Expr<T>>,
174 },
175 /// `<expr> <op> ALL (<query>)`
176 AllSubquery {
177 left: Box<Expr<T>>,
178 op: Op,
179 right: Box<Query<T>>,
180 },
181 /// `<expr> <op> ALL (<array_expr>)`
182 AllExpr {
183 left: Box<Expr<T>>,
184 op: Op,
185 right: Box<Expr<T>>,
186 },
187 /// `ARRAY[<expr>*]`
188 Array(Vec<Expr<T>>),
189 ArraySubquery(Box<Query<T>>),
190 /// `LIST[<expr>*]`
191 List(Vec<Expr<T>>),
192 ListSubquery(Box<Query<T>>),
193 /// `MAP[<expr>*]`
194 Map(Vec<MapEntry<T>>),
195 MapSubquery(Box<Query<T>>),
196 /// `<expr>([<expr>(:<expr>)?])+`
197 Subscript {
198 expr: Box<Expr<T>>,
199 positions: Vec<SubscriptPosition<T>>,
200 },
201}
202
203impl<T: AstInfo> AstDisplay for Expr<T> {
204 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
205 match self {
206 Expr::Identifier(s) => f.write_node(&display::separated(s, ".")),
207 Expr::QualifiedWildcard(q) => {
208 f.write_node(&display::separated(q, "."));
209 f.write_str(".*");
210 }
211 Expr::FieldAccess { expr, field } => {
212 write_dot_receiver(f, expr);
213 f.write_str(".");
214 f.write_node(field);
215 }
216 Expr::WildcardAccess(expr) => {
217 write_dot_receiver(f, expr);
218 f.write_str(".*");
219 }
220 Expr::Parameter(n) => f.write_str(&format!("${}", n)),
221 Expr::Not { expr } => {
222 f.write_str("NOT ");
223 // `NOT` binds tighter than `AND`/`OR`, so an operand exposing a
224 // looser operator on its left spine (`NOT (a OR b)`) must keep its
225 // parens once `Nested` is stripped. We use `left_edge`, since the
226 // `NOT` sits to the operand's left.
227 write_binary_operand(f, expr, left_edge(expr) < prec::NOT);
228 }
229 Expr::And { left, right } => {
230 write_binary_operand(f, left, right_edge(left) < prec::AND);
231 f.write_str(" AND ");
232 write_binary_operand(f, right, left_edge(right) <= prec::AND);
233 }
234 Expr::Or { left, right } => {
235 write_binary_operand(f, left, right_edge(left) < prec::OR);
236 f.write_str(" OR ");
237 write_binary_operand(f, right, left_edge(right) <= prec::OR);
238 }
239 Expr::IsExpr {
240 expr,
241 negated,
242 construct,
243 } => {
244 write_binary_operand(f, expr, right_edge(expr) < prec::IS);
245 f.write_str(" IS ");
246 if *negated {
247 f.write_str("NOT ");
248 }
249 // `IS DISTINCT FROM <rhs>` parses the RHS at the `IS` precedence
250 // (see `Parser::parse_is`), so a RHS whose left spine binds at or
251 // below `IS` re-associates out of the `IS` unless parenthesized
252 // (`a IS DISTINCT FROM b OR c` is `(a IS DISTINCT FROM b) OR c`).
253 // The other constructs (`NULL`/`TRUE`/…) are bare keywords.
254 if let IsExprConstruct::DistinctFrom(rhs) = construct {
255 f.write_str("DISTINCT FROM ");
256 write_binary_operand(f, rhs, left_edge(rhs) <= prec::IS);
257 } else {
258 f.write_node(construct);
259 }
260 }
261 Expr::InList {
262 expr,
263 list,
264 negated,
265 } => {
266 write_binary_operand(f, expr, right_edge(expr) < prec::LIKE);
267 f.write_str(" ");
268 if *negated {
269 f.write_str("NOT ");
270 }
271 f.write_str("IN (");
272 f.write_node(&display::comma_separated(list));
273 f.write_str(")");
274 }
275 Expr::InSubquery {
276 expr,
277 subquery,
278 negated,
279 } => {
280 write_binary_operand(f, expr, right_edge(expr) < prec::LIKE);
281 f.write_str(" ");
282 if *negated {
283 f.write_str("NOT ");
284 }
285 f.write_str("IN (");
286 f.write_node(&subquery);
287 f.write_str(")");
288 }
289 Expr::Like {
290 expr,
291 pattern,
292 escape,
293 case_insensitive,
294 negated,
295 } => {
296 write_binary_operand(f, expr, right_edge(expr) < prec::LIKE);
297 f.write_str(" ");
298 if *negated {
299 f.write_str("NOT ");
300 }
301 if *case_insensitive {
302 f.write_str("I");
303 }
304 f.write_str("LIKE ");
305 // The pattern and escape parse at `Like` precedence and sit to the
306 // right of the keyword, so an operand exposing a precedence at or
307 // below `Like` on its left spine (e.g. an `IN`/`LIKE`/`BETWEEN` at
308 // equal precedence) re-associates unless parenthesized.
309 // `a LIKE b IN (q)` parses as `(a LIKE b) IN (q)`. When an `ESCAPE`
310 // follows, the pattern is *also* immediately left of `ESCAPE`: a
311 // `[I]LIKE` exposed on the pattern's right spine would steal the
312 // `ESCAPE` as its own (`a LIKE NOT b LIKE c ESCAPE d` parses the
313 // escape onto the inner `b LIKE c`), so guard the right edge too.
314 let pattern_parens = left_edge(pattern) <= prec::LIKE
315 || (escape.is_some() && right_edge(pattern) <= prec::LIKE);
316 write_binary_operand(f, pattern, pattern_parens);
317 if let Some(escape) = escape {
318 f.write_str(" ESCAPE ");
319 write_binary_operand(f, escape, left_edge(escape) <= prec::LIKE);
320 }
321 }
322 Expr::Between {
323 expr,
324 negated,
325 low,
326 high,
327 } => {
328 // The subject is the LHS of the `BETWEEN` infix (parsed at
329 // `Like`). A spine exposed at or below `Like` on its right would
330 // pull `BETWEEN` inside it (`a OR b BETWEEN …` is `a OR (b BETWEEN
331 // …)`), so parenthesize via `right_edge`. Parser ASTs wrap such a
332 // subject in `Nested` (`ATOM`), so they're unaffected.
333 write_binary_operand(f, expr, right_edge(expr) < prec::LIKE);
334 if *negated {
335 f.write_str(" NOT");
336 }
337 f.write_str(" BETWEEN ");
338 write_between_bound(f, low);
339 f.write_str(" AND ");
340 write_between_bound(f, high);
341 }
342 Expr::Op { op, expr1, expr2 } => {
343 if let Some(expr2) = expr2 {
344 // Binary operators are left-associative: parenthesize an
345 // operand that would re-associate once `Nested` is stripped.
346 // The left by its `right_edge` (strictly looser than `op`), the
347 // right by its `left_edge` (equal-or-looser, as equal
348 // re-associates left). See those helpers for the spine
349 // reasoning.
350 let p = binary_op_precedence(op);
351 write_binary_operand(f, expr1, right_edge(expr1) < p);
352 f.write_str(" ");
353 f.write_str(op);
354 f.write_str(" ");
355 write_binary_operand(f, expr2, left_edge(expr2) <= p);
356 } else {
357 f.write_str(op);
358 f.write_str(" ");
359 // A prefix operator binds tighter than `COLLATE` and the
360 // binary operators but looser than the postfix `::`/`[…]`
361 // forms, and `- <number>` lexes as a negative literal, so a
362 // low-precedence or numeric-leftmost operand must be
363 // parenthesized to keep the prefix operator's scope.
364 if prefix_operand_needs_parens(expr1.as_ref()) {
365 f.write_str("(");
366 f.write_node(&expr1);
367 f.write_str(")");
368 } else {
369 f.write_node(&expr1);
370 }
371 }
372 }
373 Expr::Cast { expr, data_type } => {
374 // `::` binds very tightly, so a non-self-delimiting operand must
375 // be parenthesized or the cast re-associates into its spine.
376 // `CAST(-0 AS int4)` (i.e. `Cast(- 0)`) would otherwise print as
377 // `- 0::int4` and reparse as `- (0::int4)`. The parser wraps such
378 // operands in `Expr::Nested`, but `normalize` strips those, so the
379 // printer must re-add them (mirroring the `Collate` arm). `Nested`
380 // is itself self-delimiting, so parser-produced ASTs don't double up.
381 if prints_self_delimiting(expr) {
382 f.write_node(&expr);
383 } else {
384 f.write_str("(");
385 f.write_node(&expr);
386 f.write_str(")");
387 }
388 f.write_str("::");
389 f.write_node(data_type);
390 }
391 Expr::Collate { expr, collation } => {
392 // `COLLATE` binds very tightly (`PostfixCollateAt`), so a
393 // low-precedence operand must be parenthesized or the collation
394 // re-associates onto its rightmost sub-operand — `a + b COLLATE c`
395 // would reparse as `a + (b COLLATE c)`. (Round-trip parens are
396 // stripped by `normalize`, so the printer must re-add them.)
397 if prints_self_delimiting(expr) {
398 f.write_node(&expr);
399 } else {
400 f.write_str("(");
401 f.write_node(&expr);
402 f.write_str(")");
403 }
404 f.write_str(" COLLATE ");
405 f.write_node(&collation);
406 }
407 Expr::HomogenizingFunction { function, exprs } => {
408 f.write_node(function);
409 f.write_str("(");
410 f.write_node(&display::comma_separated(exprs));
411 f.write_str(")");
412 }
413 Expr::NullIf { l_expr, r_expr } => {
414 f.write_str("NULLIF(");
415 f.write_node(&display::comma_separated(&[l_expr, r_expr]));
416 f.write_str(")");
417 }
418 Expr::Nested(ast) => {
419 f.write_str("(");
420 f.write_node(&ast);
421 f.write_str(")");
422 }
423 Expr::Row { exprs } => {
424 f.write_str("ROW(");
425 f.write_node(&display::comma_separated(exprs));
426 f.write_str(")");
427 }
428 Expr::Value(v) => {
429 f.write_node(v);
430 }
431 Expr::Function(fun) => {
432 f.write_node(fun);
433 }
434 Expr::Case {
435 operand,
436 conditions,
437 results,
438 else_result,
439 } => {
440 f.write_str("CASE");
441 if let Some(operand) = operand {
442 f.write_str(" ");
443 f.write_node(&operand);
444 }
445 for (c, r) in conditions.iter().zip_eq(results) {
446 f.write_str(" WHEN ");
447 f.write_node(c);
448 f.write_str(" THEN ");
449 f.write_node(r);
450 }
451
452 if let Some(else_result) = else_result {
453 f.write_str(" ELSE ");
454 f.write_node(&else_result);
455 }
456 f.write_str(" END")
457 }
458 Expr::Exists(s) => {
459 f.write_str("EXISTS (");
460 f.write_node(&s);
461 f.write_str(")");
462 }
463 Expr::Subquery(s) => {
464 f.write_str("(");
465 f.write_node(&s);
466 f.write_str(")");
467 }
468 Expr::AnySubquery { left, op, right } => {
469 write_quantified_left(f, left, op);
470 f.write_str(" ");
471 f.write_str(op);
472 f.write_str(" ANY (");
473 f.write_node(&right);
474 f.write_str(")");
475 }
476 Expr::AnyExpr { left, op, right } => {
477 write_quantified_left(f, left, op);
478 f.write_str(" ");
479 f.write_str(op);
480 f.write_str(" ANY (");
481 f.write_node(&right);
482 f.write_str(")");
483 }
484 Expr::AllSubquery { left, op, right } => {
485 write_quantified_left(f, left, op);
486 f.write_str(" ");
487 f.write_str(op);
488 f.write_str(" ALL (");
489 f.write_node(&right);
490 f.write_str(")");
491 }
492 Expr::AllExpr { left, op, right } => {
493 write_quantified_left(f, left, op);
494 f.write_str(" ");
495 f.write_str(op);
496 f.write_str(" ALL (");
497 f.write_node(&right);
498 f.write_str(")");
499 }
500 Expr::Array(exprs) => {
501 f.write_str("ARRAY[");
502 f.write_node(&display::comma_separated(exprs));
503 f.write_str("]");
504 }
505 Expr::ArraySubquery(s) => {
506 f.write_str("ARRAY(");
507 f.write_node(&s);
508 f.write_str(")");
509 }
510 Expr::List(exprs) => {
511 f.write_str("LIST[");
512 f.write_node(&display::comma_separated(exprs));
513 f.write_str("]");
514 }
515 Expr::ListSubquery(s) => {
516 f.write_str("LIST(");
517 f.write_node(&s);
518 f.write_str(")");
519 }
520 Expr::Map(exprs) => {
521 f.write_str("MAP[");
522 f.write_node(&display::comma_separated(exprs));
523 f.write_str("]");
524 }
525 Expr::MapSubquery(s) => {
526 f.write_str("MAP(");
527 f.write_node(&s);
528 f.write_str(")");
529 }
530 Expr::Subscript { expr, positions } => {
531 write_subscript_receiver(f, expr);
532 f.write_str("[");
533
534 let mut first = true;
535
536 for p in positions {
537 if first {
538 first = false
539 } else {
540 f.write_str("][");
541 }
542 f.write_node(p);
543 }
544
545 f.write_str("]");
546 }
547 }
548 }
549}
550impl_display_t!(Expr);
551
552/// Write `expr` as the receiver of a `.` operator (used by `FieldAccess` and
553/// `WildcardAccess`), parenthesizing when the receiver could re-bind the
554/// trailing dot on reparse. The `.` token has very high precedence and both
555/// the lexer and parser greedily extend adjacent tokens: `1.x` tokenizes the
556/// number `1.` and leaves `x` as an alias, and `'a'::T.x` consumes `T.x` as a
557/// qualified type name. The whitelist below covers receivers that print as
558/// self-terminating syntax (parenthesized exprs, function calls, bracketed
559/// collections, etc.). Anything else gets explicit parens.
560///
561/// A bare `Identifier`/`QualifiedWildcard` receiver is *not* safe: `a` then
562/// `.b`/`.*` prints as `a.b`/`a.*`, which reparses as the qualified identifier
563/// `Identifier([a, b])` / `QualifiedWildcard([a])` rather than a field/wildcard
564/// access. The parser only ever builds those accesses over a parenthesized
565/// receiver (`(a).b`), so it wraps the name in `Expr::Nested`. A bare name here
566/// is a `Nested`-stripped AST and must be re-parenthesized. A `FieldAccess` /
567/// `WildcardAccess` receiver *is* safe, because its own printing already
568/// parenthesizes a bare-name base (`(a).b.c`), so the chain stays self-delimiting.
569///
570/// The quantified-subquery forms (`AnySubquery`/`AllSubquery`, printed
571/// `<expr> <op> ANY (<query>)`) are likewise *not* safe: they end in a `(query)`
572/// that is only a sub-part, so a trailing `.x`/`.*` binds to that inner subquery
573/// rather than the whole expression. (Contrast `Subquery`/`ArraySubquery`/… which
574/// are a single `(…)`/`ARRAY(…)` primary, so a trailing dot attaches to the whole
575/// thing.)
576fn write_dot_receiver<W: fmt::Write, T: AstInfo>(f: &mut AstFormatter<W>, expr: &Expr<T>) {
577 let safe = matches!(
578 expr,
579 Expr::FieldAccess { .. }
580 | Expr::WildcardAccess(_)
581 | Expr::Parameter(_)
582 | Expr::Nested(_)
583 | Expr::Row { .. }
584 | Expr::Function(_)
585 | Expr::Case { .. }
586 | Expr::Exists(_)
587 | Expr::Subquery(_)
588 | Expr::Array(_)
589 | Expr::ArraySubquery(_)
590 | Expr::List(_)
591 | Expr::ListSubquery(_)
592 | Expr::Map(_)
593 | Expr::MapSubquery(_)
594 | Expr::Subscript { .. }
595 | Expr::HomogenizingFunction { .. }
596 | Expr::NullIf { .. }
597 | Expr::Value(
598 Value::String(_)
599 | Value::Boolean(_)
600 | Value::Null
601 | Value::HexString(_)
602 | Value::Interval(_)
603 )
604 );
605 if safe {
606 f.write_node(expr);
607 } else {
608 f.write_str("(");
609 f.write_node(expr);
610 f.write_str(")");
611 }
612}
613
614/// Write `left` as the LHS of `<left> <op> ANY/ALL (...)`. The printed `<op>` is an
615/// ordinary binary infix. It can be any operator the parser accepts here, from
616/// `=`/`<` (`Cmp`) all the way down to `*`/`/`/`%` (`MultiplyDivide`), and on
617/// reparse it binds into any operator exposed on `left`'s right spine that is
618/// *strictly looser* than `<op>` itself, stealing that suffix into the quantified
619/// expression's left rather than wrapping the whole `left`. So parenthesize
620/// exactly when `left`'s [`right_edge`] binds looser than `<op>`'s own precedence
621/// ([`binary_op_precedence`]), mirroring the binary-`Op` arm. Using the operator's
622/// real precedence (not a fixed `Like` threshold) both parenthesizes a
623/// tighter-binding `<op>` over a looser left and leaves an equal-or-tighter left
624/// bare (`a = b = ANY (…)`, `a LIKE b = ANY (…)`), which the old fixed threshold
625/// over-parenthesized. The tighter-binding case, `(a + b) * ANY (…)`, would
626/// otherwise print `a + b * ANY (…)` and reparse as the different
627/// `a + (b * ANY (…))`. [`right_edge`] also sees a looser spine hidden under
628/// right-transparent prefixes, e.g. the `NOT`'s `IN` in `- NOT a IN (b) = ANY (…)`.
629fn write_quantified_left<W: fmt::Write, T: AstInfo>(
630 f: &mut AstFormatter<W>,
631 expr: &Expr<T>,
632 op: &Op,
633) {
634 let needs_parens = right_edge(expr) < binary_op_precedence(op);
635 if needs_parens {
636 f.write_str("(");
637 f.write_node(expr);
638 f.write_str(")");
639 } else {
640 f.write_node(expr);
641 }
642}
643
644/// Write `bound` as a `BETWEEN … AND …` bound. The parser parses both bounds with
645/// `parse_subexpr(Precedence::Like)` (see `Parser::parse_between`), starting fresh
646/// with nothing to the bound's left, so it walks the bound's *left spine* and
647/// stops at the first operator binding at or below `Like`, leaving that operator
648/// outside the bound (`x BETWEEN 1 IS NULL AND y` parses `1` as the bound, then
649/// expects `AND` but finds `IS`). A bound is therefore safe bare only when its
650/// left edge binds strictly above `Like`. Use [`left_edge`] (not [`right_edge`],
651/// which closes at `ATOM` for the right-closing `IS NULL`/`= ANY (…)`/`IN (…)`
652/// forms whose looseness is on the left). The parser wraps these bounds in
653/// `Expr::Nested` (which is `ATOM`, so it prints bare). This re-adds the parens
654/// for ASTs where that wrapper is absent.
655fn write_between_bound<W: fmt::Write, T: AstInfo>(f: &mut AstFormatter<W>, bound: &Expr<T>) {
656 let needs_parens = left_edge(bound) <= prec::LIKE;
657 if needs_parens {
658 f.write_str("(");
659 f.write_node(bound);
660 f.write_str(")");
661 } else {
662 f.write_node(bound);
663 }
664}
665
666/// Output-precedence ranks, derived directly from the parser's [`Precedence`]
667/// ladder (higher binds tighter) so it stays the single source of truth:
668/// reordering or inserting a parser level reranks these automatically, and only
669/// the variant each rank maps to is maintained by hand. They classify the *top*
670/// operator an expr prints with, so the binary-operator printer can parenthesize
671/// an operand that would otherwise re-associate on reparse. `ATOM`, the one rank
672/// with no parser counterpart, is layered one above the tightest parser level to
673/// mark the self-delimiting primaries. They never need parens.
674///
675/// [`Precedence`]: crate::parser::Precedence
676// `Precedence` is a fieldless enum with a handful of variants, so reading each
677// discriminant with `as u8` is exact and lossless.
678#[allow(clippy::as_conversions)]
679mod prec {
680 use crate::parser::Precedence;
681
682 pub const OR: u8 = Precedence::Or as u8;
683 pub const AND: u8 = Precedence::And as u8;
684 pub const NOT: u8 = Precedence::PrefixNot as u8;
685 pub const IS: u8 = Precedence::Is as u8;
686 pub const CMP: u8 = Precedence::Cmp as u8;
687 pub const LIKE: u8 = Precedence::Like as u8;
688 pub const OTHER: u8 = Precedence::Other as u8;
689 pub const PLUS_MINUS: u8 = Precedence::PlusMinus as u8;
690 pub const MULTIPLY_DIVIDE: u8 = Precedence::MultiplyDivide as u8;
691 // The `COLLATE` and postfix (`::`/`[…]`) parser levels live between
692 // `MULTIPLY_DIVIDE` and `ATOM`, but neither edge function ever *returns* them:
693 // those forms are self-delimiting (their own operand is parenthesized when it
694 // isn't), so both their edges rank `ATOM`. Kept for parity with the ladder.
695 #[allow(dead_code)]
696 pub const COLLATE: u8 = Precedence::PostfixCollateAt as u8;
697 pub const PREFIX: u8 = Precedence::PrefixPlusMinus as u8;
698 #[allow(dead_code)]
699 pub const POSTFIX: u8 = Precedence::PostfixSubscriptCast as u8;
700 pub const ATOM: u8 = Precedence::PostfixSubscriptCast as u8 + 1;
701}
702
703/// The precedence of a binary operator, mirroring `Parser::get_next_precedence`.
704/// A namespaced `OPERATOR(...)` binds at `OTHER`, like the parser.
705fn binary_op_precedence(op: &Op) -> u8 {
706 if op.namespace.is_some() {
707 return prec::OTHER;
708 }
709 match op.op.as_str() {
710 "=" | "<" | "<=" | "<>" | "!=" | ">" | ">=" => prec::CMP,
711 "+" | "-" => prec::PLUS_MINUS,
712 "*" | "/" | "%" => prec::MULTIPLY_DIVIDE,
713 _ => prec::OTHER,
714 }
715}
716
717/// The precedence at which a prefix operator (`Op` with no second operand)
718/// parses its operand, mirroring `Parser::parse_prefix`: `-`/`+` at
719/// `PrefixPlusMinus`, but `~` (and namespaced prefixes) at `Other`, so `~ a + b`
720/// parses as `~ (a + b)`. `~` binds looser than `+`/`-`/`*`.
721fn unary_prec(op: &Op) -> u8 {
722 if op.namespace.is_none() && (op.op == "-" || op.op == "+") {
723 prec::PREFIX
724 } else {
725 prec::OTHER
726 }
727}
728
729/// The loosest precedence exposed on `expr`'s *right spine*, the precedence at
730/// which an operator printed immediately to its right would bind *into* it
731/// rather than wrap it. For a left operand / subject of a construct that prints
732/// to its right, this is what decides parenthesization (its mirror, [`left_edge`],
733/// decides right operands), because a prefix operator and the right operand of a
734/// binary/`BETWEEN`/`LIKE`/`IS DISTINCT FROM` are right-transparent:
735/// `- NOT a IN (b)` exposes the `NOT`'s `IN` on the right even though its top node
736/// is unary `-`. Forms that close with a bracket on the right (`(…)`, `[…]`,
737/// `::type`, `IS NULL`) are `ATOM`.
738fn right_edge<T: AstInfo>(expr: &Expr<T>) -> u8 {
739 match expr {
740 // Right-transparent binary infixes: an operator tighter than this one
741 // binds into the right operand, which itself may expose a looser spine.
742 Expr::Or { right, .. } => prec::OR.min(right_edge(right)),
743 Expr::And { right, .. } => prec::AND.min(right_edge(right)),
744 Expr::Op {
745 op, expr2: Some(r), ..
746 } => binary_op_precedence(op).min(right_edge(r)),
747 // Prefix operators expose their operand's right spine.
748 Expr::Op {
749 op,
750 expr1,
751 expr2: None,
752 } => unary_prec(op).min(right_edge(expr1)),
753 Expr::Not { expr } => prec::NOT.min(right_edge(expr)),
754 // `IS DISTINCT FROM x` exposes `x`, while `IS NULL`/`TRUE`/… close.
755 Expr::IsExpr {
756 construct: IsExprConstruct::DistinctFrom(x),
757 ..
758 } => prec::IS.min(right_edge(x)),
759 // `… BETWEEN low AND high` exposes `high`. `… [I]LIKE pat [ESCAPE esc]`
760 // exposes the rightmost of `esc`/`pat`.
761 Expr::Between { high, .. } => prec::LIKE.min(right_edge(high)),
762 Expr::Like {
763 pattern, escape, ..
764 } => {
765 let rightmost = escape.as_deref().unwrap_or_else(|| pattern.as_ref());
766 prec::LIKE.min(right_edge(rightmost))
767 }
768 // Everything else closes on the right (a bracket, a keyword, a literal,
769 // or `IS NULL`-style), so nothing binds into it.
770 _ => prec::ATOM,
771 }
772}
773
774/// The loosest precedence exposed on `expr`'s *left spine*, the mirror of
775/// [`right_edge`]. For a *right* operand (an operator on its left), this is what
776/// decides parenthesization: a left-associative operator printed to its left
777/// reaches into the left spine and re-associates if that spine exposes a
778/// precedence at or below the operator's. The top operator alone is not enough,
779/// because a left-nested chain can bury a looser operator down its left edge:
780/// `387 = ANY (...) LIKE a IN (...)` has a top `IN` (`Like`) but exposes the
781/// `= ANY` (`Cmp`) on its left, so a tighter `<>` to its left
782/// (`48 <> 387 = ANY (...) ...`) would steal the `<>` into the `= ANY`'s left
783/// rather than leave it as the `<>`'s right operand. Forms that open with their
784/// own token on the left (a prefix operator, a keyword, `(…)`, a literal) are
785/// `ATOM`.
786fn left_edge<T: AstInfo>(expr: &Expr<T>) -> u8 {
787 match expr {
788 // Left-transparent infixes / postfix-keyword constructs: the subject (or
789 // left operand) sits on the left spine, so descend into it.
790 Expr::Or { left, .. } => prec::OR.min(left_edge(left)),
791 Expr::And { left, .. } => prec::AND.min(left_edge(left)),
792 Expr::Op {
793 op,
794 expr1,
795 expr2: Some(_),
796 } => binary_op_precedence(op).min(left_edge(expr1)),
797 Expr::IsExpr { expr, .. } => prec::IS.min(left_edge(expr)),
798 Expr::AnyExpr { left, .. }
799 | Expr::AllExpr { left, .. }
800 | Expr::AnySubquery { left, .. }
801 | Expr::AllSubquery { left, .. } => prec::CMP.min(left_edge(left)),
802 Expr::Like { expr, .. }
803 | Expr::Between { expr, .. }
804 | Expr::InList { expr, .. }
805 | Expr::InSubquery { expr, .. } => prec::LIKE.min(left_edge(expr)),
806 // Everything else leads with its own token on the left: a prefix
807 // operator (`-`/`+`/`~`/`NOT`), a keyword, `(…)`, `ARRAY[…]`, a literal,
808 // or a `COLLATE`/`::`/`[…]` whose own operand the printer parenthesizes
809 // when it isn't self-delimiting. Nothing to the left binds into it.
810 _ => prec::ATOM,
811 }
812}
813
814/// Write `operand` for a binary operator, parenthesizing it iff `needs_parens`.
815fn write_binary_operand<W: fmt::Write, T: AstInfo>(
816 f: &mut AstFormatter<W>,
817 operand: &Expr<T>,
818 needs_parens: bool,
819) {
820 if needs_parens {
821 f.write_str("(");
822 f.write_node(operand);
823 f.write_str(")");
824 } else {
825 f.write_node(operand);
826 }
827}
828
829/// Whether `expr` prints in a *self-delimiting* form — atomic, or wrapped in its
830/// own brackets/parens (`name(...)`, `(…)`, `ARRAY[…]`, `CASE … END`, …) — so it
831/// is safe to print immediately to the left of a tight postfix operator (`::`,
832/// `COLLATE`, or the `IN` delimiter of the `position(<needle> IN …)` special
833/// form) without the operator re-associating into the expression's spine.
834///
835/// Anything with an exposed operator spine is *not* self-delimiting: a tight
836/// postfix would bind to its rightmost sub-operand (`a + b COLLATE c` parses as
837/// `a + (b COLLATE c)`), and the `position` `IN` delimiter would split on an
838/// inner `IN`/comparison (`a IN (q) ->> b`). Callers must parenthesize / fall
839/// back for those. Postfix forms (`::`/`COLLATE`/`[…]`) are self-delimiting only
840/// when their own inner operand is.
841fn prints_self_delimiting<T: AstInfo>(expr: &Expr<T>) -> bool {
842 match expr {
843 Expr::Value(_)
844 | Expr::Identifier(_)
845 | Expr::QualifiedWildcard(_)
846 | Expr::Parameter(_)
847 | Expr::Function(_)
848 | Expr::HomogenizingFunction { .. }
849 | Expr::NullIf { .. }
850 | Expr::Subquery(_)
851 | Expr::Exists(_)
852 | Expr::Nested(_)
853 | Expr::Array(_)
854 | Expr::ArraySubquery(_)
855 | Expr::List(_)
856 | Expr::ListSubquery(_)
857 | Expr::Map(_)
858 | Expr::MapSubquery(_)
859 | Expr::Case { .. }
860 | Expr::Row { .. } => true,
861 // The postfix `::` / `COLLATE` / `[…]` forms print as `<inner><suffix>`,
862 // so they are safe only when their inner operand is.
863 Expr::Cast { expr, .. } | Expr::Collate { expr, .. } | Expr::Subscript { expr, .. } => {
864 prints_self_delimiting(expr)
865 }
866 _ => false,
867 }
868}
869
870/// Whether the operand of a prefix operator (`-`/`+`/`~`) must be parenthesized
871/// to round-trip. A prefix op binds *tighter* than `COLLATE`/`AT TIME ZONE` and
872/// the binary/comparison operators, but *looser* than the postfix `::`/`[…]`
873/// forms — and `- <number>` additionally lexes as a negative literal. So peel
874/// the tight postfixes (`::`/`[…]`); if the chain bottoms out at a numeric
875/// literal the sign would fold into it, and if it bottoms out at anything other
876/// than a self-delimiting non-`COLLATE` primary (a `COLLATE`, a binary op, …) the
877/// prefix op would re-associate — both need parens. (`a + b COLLATE c` reparses
878/// as `a + (b COLLATE c)`; `- x COLLATE c` as `(- x) COLLATE c`.)
879fn prefix_operand_needs_parens<T: AstInfo>(operand: &Expr<T>) -> bool {
880 let mut e = operand;
881 let mut saw_postfix = false;
882 loop {
883 match e {
884 Expr::Cast { expr, .. } | Expr::Subscript { expr, .. } => {
885 saw_postfix = true;
886 e = expr.as_ref();
887 }
888 Expr::Value(Value::Number(_)) => return saw_postfix,
889 // Another prefix operator (`+ + x`, `- ~ x`, `NOT NOT x`) stacks
890 // directly: prefix operators don't re-associate, and the inner
891 // operator symbol sits between the outer one and any digit so there
892 // is no `- <number>` fold. Always safe — and crucially, NOT adding
893 // parens here keeps deep unary chains from exploding the nesting
894 // depth (and overflowing the stack) on reparse.
895 Expr::Op { expr2: None, .. } | Expr::Not { .. } => return false,
896 // Self-delimiting, but a top-level `COLLATE` binds looser than the
897 // prefix op, so it (unlike `::`/`[…]`) is not safe here.
898 _ => return !(prints_self_delimiting(e) && !matches!(e, Expr::Collate { .. })),
899 }
900 }
901}
902
903/// Write `expr` as the receiver of a `[…]` subscript. An unparenthesized
904/// `Identifier(["map"])` reparses as `Token::Keyword(MAP)` followed by `[`,
905/// which dispatches to `parse_map` (the map-literal grammar) instead of a
906/// regular subscript. Parenthesize identifiers whose last component is a
907/// context-sensitive keyword so the round trip stays an identifier subscript.
908fn write_subscript_receiver<W: fmt::Write, T: AstInfo>(f: &mut AstFormatter<W>, expr: &Expr<T>) {
909 let needs_parens = match expr {
910 // A bare keyword identifier (`map`, `list`, …) dispatches to the
911 // map/list-literal grammar before `[`, so it needs parens even though
912 // identifiers are otherwise safe receivers.
913 Expr::Identifier(idents) => idents
914 .last()
915 .and_then(|id| id.as_keyword())
916 .map(|kw| kw.is_context_sensitive_keyword())
917 .unwrap_or(false),
918 // Self-delimiting primaries, the bracketed collections, and the postfix
919 // forms that end in an identifier or `)` are safe: a following `[…]`
920 // attaches to the whole receiver as a fresh subscript.
921 Expr::QualifiedWildcard(_)
922 | Expr::Parameter(_)
923 | Expr::Value(_)
924 | Expr::Function(_)
925 | Expr::HomogenizingFunction { .. }
926 | Expr::NullIf { .. }
927 | Expr::Nested(_)
928 | Expr::Subquery(_)
929 | Expr::Exists(_)
930 | Expr::Case { .. }
931 | Expr::Row { .. }
932 | Expr::Array(_)
933 | Expr::ArraySubquery(_)
934 | Expr::List(_)
935 | Expr::ListSubquery(_)
936 | Expr::Map(_)
937 | Expr::MapSubquery(_)
938 | Expr::FieldAccess { .. }
939 | Expr::WildcardAccess(_)
940 | Expr::Collate { .. } => false,
941 // `Cast`: the type parser swallows a following `[…]` as an array suffix
942 // (`a::int4[1]` is `a` cast to `int4[]`, not a subscript of `a::int4`).
943 // `Subscript`: consecutive `[…]` flatten into one node (`a[1][2]` is a
944 // single subscript), so a nested subscript receiver must be parenthesized
945 // to stay nested. Everything else (operators, `IS`/`LIKE`/… constructs)
946 // binds looser than `[` and would re-associate, so parenthesize by default.
947 _ => true,
948 };
949 if needs_parens {
950 f.write_str("(");
951 f.write_node(expr);
952 f.write_str(")");
953 } else {
954 f.write_node(expr);
955 }
956}
957
958impl<T: AstInfo> Expr<T> {
959 pub fn null() -> Expr<T> {
960 Expr::Value(Value::Null)
961 }
962
963 pub fn number<S>(n: S) -> Expr<T>
964 where
965 S: Into<String>,
966 {
967 Expr::Value(Value::Number(n.into()))
968 }
969
970 pub fn negate(self) -> Expr<T> {
971 Expr::Not {
972 expr: Box::new(self),
973 }
974 }
975
976 pub fn and(self, right: Expr<T>) -> Expr<T> {
977 Expr::And {
978 left: Box::new(self),
979 right: Box::new(right),
980 }
981 }
982
983 pub fn or(self, right: Expr<T>) -> Expr<T> {
984 Expr::Or {
985 left: Box::new(self),
986 right: Box::new(right),
987 }
988 }
989
990 pub fn binop(self, op: Op, right: Expr<T>) -> Expr<T> {
991 Expr::Op {
992 op,
993 expr1: Box::new(self),
994 expr2: Some(Box::new(right)),
995 }
996 }
997
998 pub fn lt(self, right: Expr<T>) -> Expr<T> {
999 self.binop(Op::bare("<"), right)
1000 }
1001
1002 pub fn lt_eq(self, right: Expr<T>) -> Expr<T> {
1003 self.binop(Op::bare("<="), right)
1004 }
1005
1006 pub fn gt(self, right: Expr<T>) -> Expr<T> {
1007 self.binop(Op::bare(">"), right)
1008 }
1009
1010 pub fn gt_eq(self, right: Expr<T>) -> Expr<T> {
1011 self.binop(Op::bare(">="), right)
1012 }
1013
1014 pub fn equals(self, right: Expr<T>) -> Expr<T> {
1015 self.binop(Op::bare("="), right)
1016 }
1017
1018 pub fn not_equals(self, right: Expr<T>) -> Expr<T> {
1019 self.binop(Op::bare("<>"), right)
1020 }
1021
1022 pub fn minus(self, right: Expr<T>) -> Expr<T> {
1023 self.binop(Op::bare("-"), right)
1024 }
1025
1026 pub fn multiply(self, right: Expr<T>) -> Expr<T> {
1027 self.binop(Op::bare("*"), right)
1028 }
1029
1030 pub fn modulo(self, right: Expr<T>) -> Expr<T> {
1031 self.binop(Op::bare("%"), right)
1032 }
1033
1034 pub fn divide(self, right: Expr<T>) -> Expr<T> {
1035 self.binop(Op::bare("/"), right)
1036 }
1037
1038 pub fn cast(self, data_type: T::DataType) -> Expr<T> {
1039 Expr::Cast {
1040 expr: Box::new(self),
1041 data_type,
1042 }
1043 }
1044
1045 pub fn call(name: T::ItemName, args: Vec<Expr<T>>) -> Expr<T> {
1046 Expr::Function(Function {
1047 name,
1048 args: FunctionArgs::args(args),
1049 filter: None,
1050 over: None,
1051 distinct: false,
1052 })
1053 }
1054
1055 pub fn call_nullary(name: T::ItemName) -> Expr<T> {
1056 Expr::call(name, vec![])
1057 }
1058
1059 pub fn call_unary(self, name: T::ItemName) -> Expr<T> {
1060 Expr::call(name, vec![self])
1061 }
1062
1063 pub fn take(&mut self) -> Expr<T> {
1064 mem::replace(self, Expr::Identifier(vec![]))
1065 }
1066}
1067
1068/// A reference to an operator.
1069#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
1070pub struct Op {
1071 /// Any namespaces that preceded the operator.
1072 pub namespace: Option<Vec<Ident>>,
1073 /// The operator itself.
1074 pub op: String,
1075}
1076
1077impl AstDisplay for Op {
1078 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
1079 if let Some(namespace) = &self.namespace {
1080 f.write_str("OPERATOR(");
1081 for name in namespace {
1082 f.write_node(name);
1083 f.write_str(".");
1084 }
1085 f.write_str(&self.op);
1086 f.write_str(")");
1087 } else {
1088 f.write_str(&self.op)
1089 }
1090 }
1091}
1092impl_display!(Op);
1093
1094impl Op {
1095 /// Constructs a new unqualified operator reference.
1096 pub fn bare<S>(op: S) -> Op
1097 where
1098 S: Into<String>,
1099 {
1100 Op {
1101 namespace: None,
1102 op: op.into(),
1103 }
1104 }
1105}
1106
1107#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
1108pub enum HomogenizingFunction {
1109 Coalesce,
1110 Greatest,
1111 Least,
1112}
1113
1114impl AstDisplay for HomogenizingFunction {
1115 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
1116 match self {
1117 HomogenizingFunction::Coalesce => f.write_str("COALESCE"),
1118 HomogenizingFunction::Greatest => f.write_str("GREATEST"),
1119 HomogenizingFunction::Least => f.write_str("LEAST"),
1120 }
1121 }
1122}
1123impl_display!(HomogenizingFunction);
1124
1125#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
1126pub struct MapEntry<T: AstInfo> {
1127 pub key: Expr<T>,
1128 pub value: Expr<T>,
1129}
1130
1131impl<T: AstInfo> AstDisplay for MapEntry<T> {
1132 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
1133 f.write_node(&self.key);
1134 f.write_str(" => ");
1135 f.write_node(&self.value);
1136 }
1137}
1138impl_display_t!(MapEntry);
1139
1140#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
1141pub struct SubscriptPosition<T: AstInfo> {
1142 pub start: Option<Expr<T>>,
1143 pub end: Option<Expr<T>>,
1144 // i.e. did this subscript include a colon
1145 pub explicit_slice: bool,
1146}
1147
1148impl<T: AstInfo> AstDisplay for SubscriptPosition<T> {
1149 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
1150 if let Some(start) = &self.start {
1151 f.write_node(start);
1152 }
1153 if self.explicit_slice {
1154 f.write_str(":");
1155 if let Some(end) = &self.end {
1156 f.write_node(end);
1157 }
1158 }
1159 }
1160}
1161impl_display_t!(SubscriptPosition);
1162
1163/// A window specification (i.e. `OVER (PARTITION BY .. ORDER BY .. etc.)`)
1164/// Includes potential IGNORE NULLS or RESPECT NULLS from before the OVER clause.
1165#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
1166pub struct WindowSpec<T: AstInfo> {
1167 pub partition_by: Vec<Expr<T>>,
1168 pub order_by: Vec<OrderByExpr<T>>,
1169 pub window_frame: Option<WindowFrame>,
1170 // Note that IGNORE NULLS and RESPECT NULLS are mutually exclusive. We validate that not both
1171 // are present during HIR planning.
1172 pub ignore_nulls: bool,
1173 pub respect_nulls: bool,
1174}
1175
1176impl<T: AstInfo> AstDisplay for WindowSpec<T> {
1177 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
1178 if self.ignore_nulls {
1179 f.write_str(" IGNORE NULLS");
1180 }
1181 if self.respect_nulls {
1182 f.write_str(" RESPECT NULLS");
1183 }
1184 f.write_str(" OVER (");
1185 let mut delim = "";
1186 if !self.partition_by.is_empty() {
1187 delim = " ";
1188 f.write_str("PARTITION BY ");
1189 f.write_node(&display::comma_separated(&self.partition_by));
1190 }
1191 if !self.order_by.is_empty() {
1192 f.write_str(delim);
1193 delim = " ";
1194 f.write_str("ORDER BY ");
1195 f.write_node(&display::comma_separated(&self.order_by));
1196 }
1197 if let Some(window_frame) = &self.window_frame {
1198 if let Some(end_bound) = &window_frame.end_bound {
1199 f.write_str(delim);
1200 f.write_node(&window_frame.units);
1201 f.write_str(" BETWEEN ");
1202 f.write_node(&window_frame.start_bound);
1203 f.write_str(" AND ");
1204 f.write_node(&*end_bound);
1205 } else {
1206 f.write_str(delim);
1207 f.write_node(&window_frame.units);
1208 f.write_str(" ");
1209 f.write_node(&window_frame.start_bound);
1210 }
1211 }
1212 f.write_str(")");
1213 }
1214}
1215impl_display_t!(WindowSpec);
1216
1217/// Specifies the data processed by a window function, e.g.
1218/// `RANGE UNBOUNDED PRECEDING` or `ROWS BETWEEN 5 PRECEDING AND CURRENT ROW`.
1219///
1220/// Note: The parser does not validate the specified bounds; the caller should
1221/// reject invalid bounds like `ROWS UNBOUNDED FOLLOWING` before execution.
1222#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
1223pub struct WindowFrame {
1224 pub units: WindowFrameUnits,
1225 pub start_bound: WindowFrameBound,
1226 /// The right bound of the `BETWEEN .. AND` clause. The end bound of `None`
1227 /// indicates the shorthand form (e.g. `ROWS 1 PRECEDING`), which must
1228 /// behave the same as `end_bound = WindowFrameBound::CurrentRow`.
1229 pub end_bound: Option<WindowFrameBound>,
1230 // TBD: EXCLUDE
1231}
1232
1233#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
1234pub enum WindowFrameUnits {
1235 Rows,
1236 Range,
1237 Groups,
1238}
1239
1240impl AstDisplay for WindowFrameUnits {
1241 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
1242 f.write_str(match self {
1243 WindowFrameUnits::Rows => "ROWS",
1244 WindowFrameUnits::Range => "RANGE",
1245 WindowFrameUnits::Groups => "GROUPS",
1246 })
1247 }
1248}
1249impl_display!(WindowFrameUnits);
1250
1251/// Specifies [WindowFrame]'s `start_bound` and `end_bound`
1252#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
1253pub enum WindowFrameBound {
1254 /// `CURRENT ROW`
1255 CurrentRow,
1256 /// `<N> PRECEDING` or `UNBOUNDED PRECEDING`
1257 Preceding(Option<u64>),
1258 /// `<N> FOLLOWING` or `UNBOUNDED FOLLOWING`.
1259 Following(Option<u64>),
1260}
1261
1262impl AstDisplay for WindowFrameBound {
1263 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
1264 match self {
1265 WindowFrameBound::CurrentRow => f.write_str("CURRENT ROW"),
1266 WindowFrameBound::Preceding(None) => f.write_str("UNBOUNDED PRECEDING"),
1267 WindowFrameBound::Following(None) => f.write_str("UNBOUNDED FOLLOWING"),
1268 WindowFrameBound::Preceding(Some(n)) => {
1269 f.write_str(n);
1270 f.write_str(" PRECEDING");
1271 }
1272 WindowFrameBound::Following(Some(n)) => {
1273 f.write_str(n);
1274 f.write_str(" FOLLOWING");
1275 }
1276 }
1277 }
1278}
1279impl_display!(WindowFrameBound);
1280
1281/// A function call
1282#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
1283pub struct Function<T: AstInfo> {
1284 pub name: T::ItemName,
1285 pub args: FunctionArgs<T>,
1286 // aggregate functions may specify e.g. `COUNT(DISTINCT X) FILTER (WHERE ...)`
1287 pub filter: Option<Box<Expr<T>>>,
1288 pub over: Option<WindowSpec<T>>,
1289 // aggregate functions may specify eg `COUNT(DISTINCT x)`
1290 pub distinct: bool,
1291}
1292
1293impl<T: AstInfo> AstDisplay for Function<T> {
1294 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
1295 self.fmt_call(f, true);
1296 }
1297}
1298
1299impl<T: AstInfo> Function<T> {
1300 /// Render this call in table-function position (`FROM f(...)`, `ROWS FROM
1301 /// (f(...))`), where the `extract(a FROM b)` / `position(a IN b)` special
1302 /// forms are *not* valid syntax — only the scalar-expression parser
1303 /// dispatches to them. Forces the plain comma form (with the name quoted
1304 /// to dodge the special grammar) so the round trip stays stable.
1305 pub(crate) fn fmt_table_call<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
1306 self.fmt_call(f, false);
1307 }
1308
1309 fn fmt_call<W: fmt::Write>(&self, f: &mut AstFormatter<W>, allow_special_form: bool) {
1310 // This block handles printing function calls that have special parsing. In stable mode, the
1311 // name is quoted and so won't get the special parsing. We only need to print the special
1312 // formats in non-stable mode.
1313 //
1314 // The special forms (`position(a IN b)`, `extract(field FROM source)`)
1315 // have no syntax for `DISTINCT`, a within-group `ORDER BY`, a `FILTER`,
1316 // or an `OVER` window. A call literally named `"position"`/`"extract"`
1317 // that carries any of those modifiers (only reachable via the quoted
1318 // name — the real special grammar doesn't accept them) must therefore
1319 // fall through to the plain quoted-call form, or the special form
1320 // silently drops them on display.
1321 let has_call_modifiers = self.distinct
1322 || self.filter.is_some()
1323 || self.over.is_some()
1324 || matches!(&self.args, FunctionArgs::Args { order_by, .. } if !order_by.is_empty());
1325 if allow_special_form && !f.stable() && !has_call_modifiers {
1326 let special: Option<(&str, &[Option<Keyword>])> =
1327 match self.name.to_ast_string_stable().as_str() {
1328 // `extract(field FROM source)` parses `field` into a string
1329 // literal, so the special form only round-trips when arg0 is
1330 // a string. A generic `"extract"(a, b)` with a non-string
1331 // first arg must use the plain (quoted) call form.
1332 r#""extract""#
1333 if self.args.len() == Some(2)
1334 && matches!(self.args.first(), Some(Expr::Value(Value::String(_)))) =>
1335 {
1336 Some(("extract", &[None, Some(FROM)]))
1337 }
1338 // `position(<needle> IN <haystack>)` parses the needle at
1339 // `Precedence::Like`, so a low-precedence needle (`NOT`, a
1340 // comparison, `IS`, a boolean connective, a quantified
1341 // comparison, ...) printed bare before the `IN` would swallow
1342 // or stop short of the delimiter. Only use the special form
1343 // with a needle that's safe to sit left of `IN`.
1344 r#""position""#
1345 if self.args.len() == Some(2)
1346 && self.args.first().is_some_and(prints_self_delimiting) =>
1347 {
1348 Some(("position", &[None, Some(IN)]))
1349 }
1350
1351 // "trim" doesn't need to appear here because it changes the function name (to
1352 // "btrim", "ltrim", or "rtrim"), but only "trim" is parsed specially. "substring"
1353 // supports comma-delimited arguments, so doesn't need to be here.
1354 _ => None,
1355 };
1356 if let Some((name, kws)) = special {
1357 f.write_str(name);
1358 f.write_str("(");
1359 self.args.intersperse_function_argument_keywords(f, kws);
1360 f.write_str(")");
1361 return;
1362 }
1363 }
1364
1365 // If the function name clashes with a keyword that has its own special
1366 // parser form, an unquoted name on reparse would trigger the
1367 // special-grammar parser instead of a regular function call. Emit the
1368 // always-quoted stable form so the regular function-call path is
1369 // preserved. The list tracks the `(Token::Keyword(KW), Some(Token::LParen))`
1370 // dispatch in `parse_prefix` (array, coalesce, ...); add a new entry
1371 // whenever a keyword grows special-grammar parens. (The `ANY`/`ALL`/`SOME`
1372 // quantifier keywords are handled more generally by `can_be_printed_bare`,
1373 // since they're also unsafe as bare identifiers, e.g. `0 # some`.)
1374 let name_stable = self.name.to_ast_string_stable();
1375 let needs_quote_to_disambiguate = matches!(
1376 name_stable.as_str(),
1377 r#""array""#
1378 | r#""coalesce""#
1379 | r#""exists""#
1380 | r#""extract""#
1381 | r#""greatest""#
1382 | r#""least""#
1383 | r#""list""#
1384 | r#""map""#
1385 | r#""normalize""#
1386 | r#""nullif""#
1387 | r#""position""#
1388 | r#""row""#
1389 | r#""substring""#
1390 | r#""trim""#
1391 );
1392 if needs_quote_to_disambiguate {
1393 f.write_str(&name_stable);
1394 } else {
1395 f.write_node(&self.name);
1396 }
1397 f.write_str("(");
1398 if self.distinct {
1399 f.write_str("DISTINCT ")
1400 }
1401 f.write_node(&self.args);
1402 f.write_str(")");
1403 if let Some(filter) = &self.filter {
1404 f.write_str(" FILTER (WHERE ");
1405 f.write_node(&filter);
1406 f.write_str(")");
1407 }
1408 if let Some(o) = &self.over {
1409 f.write_node(o);
1410 }
1411 }
1412}
1413impl_display_t!(Function);
1414
1415/// Arguments for a function call.
1416#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
1417pub enum FunctionArgs<T: AstInfo> {
1418 /// The special star argument, as in `count(*)`.
1419 Star,
1420 /// A normal list of arguments.
1421 Args {
1422 args: Vec<Expr<T>>,
1423 order_by: Vec<OrderByExpr<T>>,
1424 },
1425}
1426
1427impl<T: AstInfo> FunctionArgs<T> {
1428 pub fn args(args: Vec<Expr<T>>) -> Self {
1429 Self::Args {
1430 args,
1431 order_by: vec![],
1432 }
1433 }
1434
1435 /// The first positional argument, if any (the `*` form has none).
1436 pub fn first(&self) -> Option<&Expr<T>> {
1437 match self {
1438 FunctionArgs::Star => None,
1439 FunctionArgs::Args { args, .. } => args.first(),
1440 }
1441 }
1442
1443 /// Returns the number of arguments. Star (`*`) is None.
1444 pub fn len(&self) -> Option<usize> {
1445 match self {
1446 FunctionArgs::Star => None,
1447 FunctionArgs::Args { args, .. } => Some(args.len()),
1448 }
1449 }
1450
1451 /// Prints associated keywords before each argument
1452 fn intersperse_function_argument_keywords<W: fmt::Write>(
1453 &self,
1454 f: &mut AstFormatter<W>,
1455 kws: &[Option<Keyword>],
1456 ) {
1457 let args = match self {
1458 FunctionArgs::Star => unreachable!(),
1459 FunctionArgs::Args { args, .. } => args,
1460 };
1461 soft_assert_eq_or_log!(args.len(), kws.len());
1462 let mut delim = "";
1463 for (arg, kw) in args.iter().zip_eq(kws) {
1464 if let Some(kw) = kw {
1465 f.write_str(delim);
1466 f.write_str(kw.as_str());
1467 delim = " ";
1468 }
1469 f.write_str(delim);
1470 f.write_node(arg);
1471 delim = " ";
1472 }
1473 }
1474}
1475
1476impl<T: AstInfo> AstDisplay for FunctionArgs<T> {
1477 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
1478 match self {
1479 FunctionArgs::Star => f.write_str("*"),
1480 FunctionArgs::Args { args, order_by } => {
1481 f.write_node(&display::comma_separated(args));
1482 if !order_by.is_empty() {
1483 f.write_str(" ORDER BY ");
1484 f.write_node(&display::comma_separated(order_by));
1485 }
1486 }
1487 }
1488 }
1489}
1490impl_display_t!(FunctionArgs);
1491
1492#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
1493pub enum IsExprConstruct<T: AstInfo> {
1494 Null,
1495 True,
1496 False,
1497 Unknown,
1498 DistinctFrom(Box<Expr<T>>),
1499}
1500
1501impl<T: AstInfo> AstDisplay for IsExprConstruct<T> {
1502 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
1503 match self {
1504 IsExprConstruct::Null => f.write_str("NULL"),
1505 IsExprConstruct::True => f.write_str("TRUE"),
1506 IsExprConstruct::False => f.write_str("FALSE"),
1507 IsExprConstruct::Unknown => f.write_str("UNKNOWN"),
1508 IsExprConstruct::DistinctFrom(e) => {
1509 f.write_str("DISTINCT FROM ");
1510 e.fmt(f);
1511 }
1512 }
1513 }
1514}
1515impl_display_t!(IsExprConstruct);