mz_sql/plan/
scope.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Handles SQL's scoping rules.
11//!
12//! A scope spans a single SQL `Query`. Nested subqueries create new scopes.
13//! Names are resolved against the innermost scope first.
14//! * If a match is found, it is returned.
15//! * If no matches are found, the name is resolved against the parent scope.
16//! * If multiple matches are found, the name is ambiguous and we return an
17//!   error to the user.
18//!
19//! Matching rules:
20//! * `bar` will match any column in the scope named `bar`
21//! * `foo.bar` will match any column in the scope named `bar` that originated
22//!    from a table named `foo`.
23//! * Table aliases such as `foo AS quux` replace the old table name.
24//! * Functions create unnamed columns, which can be named with columns aliases
25//!   `(bar + 1) as more_bar`.
26//!
27//! Additionally, most databases fold some form of CSE into name resolution so
28//! that eg `SELECT sum(x) FROM foo GROUP BY sum(x)` would be treated something
29//! like `SELECT "sum(x)" FROM foo GROUP BY sum(x) AS "sum(x)"` rather than
30//! failing to resolve `x`. We handle this by including the underlying
31//! `sql_parser::ast::Expr` in cases where this is possible.
32//!
33//! Many SQL expressions do strange and arbitrary things to scopes. Rather than
34//! try to capture them all here, we just expose the internals of `Scope` and
35//! handle it in the appropriate place in `super::query`.
36//!
37//! NOTE(benesch): The above approach of exposing scope's internals to the
38//! entire planner has not aged well. SQL scopes are now full of undocumented
39//! assumptions and requirements, since various subcomponents of the planner
40//! shove data into scope items to communicate with subcomponents a mile away.
41//! I've tried to refactor this code several times to no avail. It works better
42//! than you might expect. But you have been warned. Tread carefully!
43
44use std::collections::BTreeSet;
45use std::iter;
46
47use mz_ore::iter::IteratorExt;
48use mz_repr::ColumnName;
49
50use crate::ast::Expr;
51use crate::names::{Aug, PartialItemName};
52use crate::plan::error::PlanError;
53use crate::plan::hir::ColumnRef;
54use crate::plan::plan_utils::JoinSide;
55
56#[derive(Debug, Clone)]
57pub struct ScopeItem {
58    /// The name of the table that produced this scope item, if any.
59    pub table_name: Option<PartialItemName>,
60    /// The name of the column.
61    pub column_name: ColumnName,
62    /// The expressions from which this scope item is derived. Used by `GROUP
63    /// BY` and window functions.
64    pub exprs: BTreeSet<Expr<Aug>>,
65    /// Whether the column is the return value of a function that produces only
66    /// a single column. This accounts for a strange PostgreSQL special case
67    /// around whole-row expansion.
68    pub from_single_column_function: bool,
69    /// Controls whether the column is only accessible via a table-qualified
70    /// reference. When false, the scope item is also excluded from `SELECT *`.
71    ///
72    /// This should be true for almost all scope items. It is set to false for
73    /// join columns in USING constraints. For example, in `t1 FULL JOIN t2
74    /// USING a`, `t1.a` and `t2.a` are still available by fully-qualified
75    /// reference, but a bare `a` refers to a new column whose value is
76    /// `coalesce(t1.a, t2.a)`. This is a big special case because normally
77    /// having three columns in scope named `a` would result in "ambiguous
78    /// column reference" errors.
79    pub allow_unqualified_references: bool,
80    /// If set, any attempt to reference this item will return the error
81    /// produced by this function.
82    ///
83    /// The function is provided with the table and column name in the
84    /// reference. It should return a `PlanError` describing why the reference
85    /// is invalid.
86    ///
87    /// This is useful for preventing access to certain columns in specific
88    /// contexts, like columns that are on the wrong side of a `LATERAL` join.
89    pub error_if_referenced: Option<fn(Option<&PartialItemName>, &ColumnName) -> PlanError>,
90    /// For table functions in scalar positions, this flag is true for the
91    /// ordinality column. If true, then this column represents an "exists" flag
92    /// for the entire row of the table function. In that case, this column must
93    /// be excluded from `*` expansion. If the corresponding datum is `NULL`, then
94    /// `*` expansion should yield a single `NULL` instead of a record with various
95    /// datums.
96    pub is_exists_column_for_a_table_function_that_was_in_the_target_list: bool,
97    // Force use of the constructor methods.
98    _private: (),
99}
100
101/// An ungrouped column in a scope.
102///
103/// We can't simply drop these items from scope. These items need to *exist*
104/// because they might shadow variables in outer scopes that would otherwise be
105/// valid to reference, but accessing them needs to produce an error.
106#[derive(Debug, Clone)]
107pub struct ScopeUngroupedColumn {
108    /// The name of the table that produced this ungrouped column, if any.
109    pub table_name: Option<PartialItemName>,
110    /// The name of the ungrouped column.
111    pub column_name: ColumnName,
112    /// Whether the original scope item allowed unqualified references.
113    pub allow_unqualified_references: bool,
114}
115
116#[derive(Debug, Clone)]
117pub struct Scope {
118    /// The items in this scope.
119    pub items: Vec<ScopeItem>,
120    /// The ungrouped columns in the scope.
121    pub ungrouped_columns: Vec<ScopeUngroupedColumn>,
122    // Whether this scope starts a new chain of lateral outer scopes.
123    //
124    // It's easiest to understand with an example. Consider this query:
125    //
126    //     SELECT (SELECT * FROM tab1, tab2, (SELECT tab1.a, tab3.a)) FROM tab3, tab4
127    //     Scope 1:                          ------------------------
128    //     Scope 2:                    ----
129    //     Scope 3:              ----
130    //     Scope 4:                                                              ----
131    //     Scope 5:                                                        ----
132    //
133    // Note that the because the derived table is not marked `LATERAL`, its
134    // reference to `tab3.a` is valid but its reference to `tab1.a` is not.
135    //
136    // Scope 5 is the parent of scope 4, scope 4 is the parent of scope 3, and
137    // so on. The non-lateral derived table is not allowed to access scopes 2
138    // and 3, because they are part of the same lateral chain, but it *is*
139    // allowed to access scope 4 and 5. So, to capture this information, we set
140    // `lateral_barrier: true` for scope 4.
141    pub lateral_barrier: bool,
142}
143
144impl ScopeItem {
145    pub fn empty() -> ScopeItem {
146        ScopeItem {
147            table_name: None,
148            column_name: "?column?".into(),
149            exprs: BTreeSet::new(),
150            from_single_column_function: false,
151            allow_unqualified_references: true,
152            error_if_referenced: None,
153            is_exists_column_for_a_table_function_that_was_in_the_target_list: false,
154            _private: (),
155        }
156    }
157
158    /// Constructs a new scope item from an unqualified column name.
159    pub fn from_column_name<N>(column_name: N) -> ScopeItem
160    where
161        N: Into<ColumnName>,
162    {
163        ScopeItem::from_name(None, column_name.into())
164    }
165
166    /// Constructs a new scope item from a name.
167    pub fn from_name<N>(table_name: Option<PartialItemName>, column_name: N) -> ScopeItem
168    where
169        N: Into<ColumnName>,
170    {
171        let mut item = ScopeItem::empty();
172        item.table_name = table_name;
173        item.column_name = column_name.into();
174        item
175    }
176
177    /// Constructs a new scope item with no name from an expression.
178    pub fn from_expr(expr: impl Into<Option<Expr<Aug>>>) -> ScopeItem {
179        let mut item = ScopeItem::empty();
180        if let Some(expr) = expr.into() {
181            item.exprs.insert(expr);
182        }
183        item
184    }
185
186    pub fn is_from_table(&self, table_name: &PartialItemName) -> bool {
187        match &self.table_name {
188            None => false,
189            Some(n) => n.matches(table_name),
190        }
191    }
192}
193
194impl Scope {
195    pub fn empty() -> Self {
196        Scope {
197            items: vec![],
198            ungrouped_columns: vec![],
199            lateral_barrier: false,
200        }
201    }
202
203    pub fn from_source<I, N>(table_name: Option<PartialItemName>, column_names: I) -> Self
204    where
205        I: IntoIterator<Item = N>,
206        N: Into<ColumnName>,
207    {
208        let mut scope = Scope::empty();
209        scope.items = column_names
210            .into_iter()
211            .map(|column_name| ScopeItem::from_name(table_name.clone(), column_name.into()))
212            .collect();
213        scope
214    }
215
216    /// Constructs an iterator over the canonical name for each column.
217    pub fn column_names(&self) -> impl Iterator<Item = &ColumnName> {
218        self.items.iter().map(|item| &item.column_name)
219    }
220
221    pub fn len(&self) -> usize {
222        self.items.len()
223    }
224
225    /// Iterates over all items in the scope.
226    ///
227    /// Items are returned in order of preference, where the innermost scope has
228    /// the highest preference. For example, given scopes `A(B(C))`, items are
229    /// presented in the order `C`, `B`, `A`.
230    ///
231    /// Items are returned alongside the column reference that targets that item
232    /// and the item's "lateral level". The latter bears explaining. The lateral
233    /// level is the number of lateral barriers between this scope and the item.
234    /// See `Scope::lateral_barrier` for a diagram. Roughly speaking, items from
235    /// different levels but the same lateral level are items from different
236    /// joins in the same subquery, while items in different lateral levels are
237    /// items from different queries entirely. Rules about ambiguity apply
238    /// within an entire lateral level, not just within a single scope level.
239    ///
240    /// NOTE(benesch): Scope` really shows its weaknesses here. Ideally we'd
241    /// have separate types like `QueryScope` and `JoinScope` that more
242    /// naturally encode the concept of a "lateral level", or at least something
243    /// along those lines.
244    fn all_items<'a>(
245        &'a self,
246        outer_scopes: &'a [Scope],
247    ) -> impl Iterator<Item = ScopeCursor<'a>> + 'a {
248        let mut lat_level = 0;
249        iter::once(self)
250            .chain(outer_scopes)
251            .enumerate()
252            .flat_map(move |(level, scope)| {
253                if scope.lateral_barrier {
254                    lat_level += 1;
255                }
256                let items = scope
257                    .items
258                    .iter()
259                    .enumerate()
260                    .map(move |(column, item)| ScopeCursor {
261                        lat_level,
262                        inner: ScopeCursorInner::Item {
263                            column: ColumnRef { level, column },
264                            item,
265                        },
266                    });
267                let ungrouped_columns = scope.ungrouped_columns.iter().map(move |uc| ScopeCursor {
268                    lat_level,
269                    inner: ScopeCursorInner::UngroupedColumn(uc),
270                });
271                items.chain(ungrouped_columns)
272            })
273    }
274
275    /// Returns all items from the given table name in the closest scope.
276    ///
277    /// If no tables with the given name are in scope, returns an empty
278    /// iterator.
279    ///
280    /// NOTE(benesch): This is wrong for zero-arity relations, because we can't
281    /// distinguish between "no such table" and a table that exists but has no
282    /// columns. The current design of scope makes this difficult to fix,
283    /// unfortunately.
284    pub fn items_from_table<'a>(
285        &'a self,
286        outer_scopes: &'a [Scope],
287        table: &PartialItemName,
288    ) -> Result<Vec<(ColumnRef, &'a ScopeItem)>, PlanError> {
289        let mut seen_level = None;
290        let items: Vec<_> = self
291            .all_items(outer_scopes)
292            .filter_map(move |c| match c.inner {
293                ScopeCursorInner::Item { column, item }
294                    if item.is_from_table(table)
295                        && *seen_level.get_or_insert(c.lat_level) == c.lat_level =>
296                {
297                    Some((column, item))
298                }
299                _ => None,
300            })
301            .collect();
302        if !items.iter().map(|(column, _)| column.level).all_equal() {
303            return Err(PlanError::AmbiguousTable(table.clone()));
304        }
305        Ok(items)
306    }
307
308    /// Returns a matching [`ColumnRef`], if one exists.
309    ///
310    /// Filters all visible items against the provided `matches` closure, and then matches this
311    /// filtered set against the provided `column_name`.
312    fn resolve_internal<'a, M>(
313        &'a self,
314        outer_scopes: &[Scope],
315        mut matches: M,
316        table_name: Option<&PartialItemName>,
317        column_name: &ColumnName,
318    ) -> Result<ColumnRef, PlanError>
319    where
320        M: FnMut(&ScopeCursor) -> bool,
321    {
322        let mut results = self
323            .all_items(outer_scopes)
324            .filter(|c| (matches)(c) && c.column_name() == column_name);
325        match results.next() {
326            None => {
327                let similar = self
328                    .all_items(outer_scopes)
329                    .filter(|c| (matches)(c))
330                    .filter_map(|c| {
331                        c.column_name()
332                            .is_similar(column_name)
333                            .then(|| c.column_name().clone())
334                    })
335                    .collect();
336                Err(PlanError::UnknownColumn {
337                    table: table_name.cloned(),
338                    column: column_name.clone(),
339                    similar,
340                })
341            }
342            Some(c) => {
343                if let Some(ambiguous) = results.find(|c2| c.lat_level == c2.lat_level) {
344                    if let Some(table_name) = table_name {
345                        if let (
346                            ScopeCursorInner::Item {
347                                column: ColumnRef { level: c_level, .. },
348                                ..
349                            },
350                            ScopeCursorInner::Item {
351                                column:
352                                    ColumnRef {
353                                        level: ambiguous_level,
354                                        ..
355                                    },
356                                ..
357                            },
358                        ) = (c.inner, ambiguous.inner)
359                        {
360                            // ColumnRefs with identical levels indicate multiple columns of the
361                            // same name in relation. If the levels differ then it is instead two
362                            // tables with the same name, both having a column with this name.
363                            if c_level == ambiguous_level {
364                                return Err(PlanError::AmbiguousColumn(column_name.clone()));
365                            }
366                        }
367                        return Err(PlanError::AmbiguousTable(table_name.clone()));
368                    } else {
369                        return Err(PlanError::AmbiguousColumn(column_name.clone()));
370                    }
371                }
372
373                match c.inner {
374                    ScopeCursorInner::UngroupedColumn(uc) => Err(PlanError::UngroupedColumn {
375                        table: uc.table_name.clone(),
376                        column: uc.column_name.clone(),
377                    }),
378                    ScopeCursorInner::Item { column, item } => {
379                        if let Some(error_if_referenced) = item.error_if_referenced {
380                            return Err(error_if_referenced(table_name, column_name));
381                        }
382                        Ok(column)
383                    }
384                }
385            }
386        }
387    }
388
389    /// Resolves references to a column name to a single column, or errors if
390    /// multiple columns are equally valid references.
391    pub fn resolve_column<'a>(
392        &'a self,
393        outer_scopes: &[Scope],
394        column_name: &ColumnName,
395    ) -> Result<ColumnRef, PlanError> {
396        let table_name = None;
397        self.resolve_internal(
398            outer_scopes,
399            |c| c.allow_unqualified_references(),
400            table_name,
401            column_name,
402        )
403    }
404
405    /// Resolves a column name in a `USING` clause.
406    pub fn resolve_using_column(
407        &self,
408        column_name: &ColumnName,
409        join_side: JoinSide,
410    ) -> Result<ColumnRef, PlanError> {
411        self.resolve_column(&[], column_name).map_err(|e| match e {
412            // Attach a bit more context to unknown and ambiguous column
413            // errors to match PostgreSQL.
414            PlanError::AmbiguousColumn(column) => {
415                PlanError::AmbiguousColumnInUsingClause { column, join_side }
416            }
417            PlanError::UnknownColumn { column, .. } => {
418                PlanError::UnknownColumnInUsingClause { column, join_side }
419            }
420            _ => e,
421        })
422    }
423
424    pub fn resolve_table_column<'a>(
425        &'a self,
426        outer_scopes: &[Scope],
427        table_name: &PartialItemName,
428        column_name: &ColumnName,
429    ) -> Result<ColumnRef, PlanError> {
430        let mut seen_at_level = None;
431        self.resolve_internal(
432            outer_scopes,
433            |c| {
434                // Once we've matched a table name at a lateral level, even if
435                // the column name did not match, we can never match an item
436                // from another lateral level.
437                if let Some(seen_at_level) = seen_at_level {
438                    if seen_at_level != c.lat_level {
439                        return false;
440                    }
441                }
442                if c.table_name().as_ref().map(|n| n.matches(table_name)) == Some(true) {
443                    seen_at_level = Some(c.lat_level);
444                    true
445                } else {
446                    false
447                }
448            },
449            Some(table_name),
450            column_name,
451        )
452    }
453
454    pub fn resolve<'a>(
455        &'a self,
456        outer_scopes: &[Scope],
457        table_name: Option<&PartialItemName>,
458        column_name: &ColumnName,
459    ) -> Result<ColumnRef, PlanError> {
460        match table_name {
461            None => self.resolve_column(outer_scopes, column_name),
462            Some(table_name) => self.resolve_table_column(outer_scopes, table_name, column_name),
463        }
464    }
465
466    /// Look to see if there is an already-calculated instance of this expr.
467    /// Failing to find one is not an error, so this just returns Option
468    pub fn resolve_expr<'a>(&'a self, expr: &Expr<Aug>) -> Option<ColumnRef> {
469        // Literal values should not be treated as "cached" because their types
470        // in scope will have already been determined, but the type of the
471        // reoccurence of the expr might want to have a different type.
472        //
473        // This is most evident in the case of literal `NULL` values. The first
474        // occurrence is likely to be cast as `ScalarType::String`, but
475        // subsequent `NULL` values should be untyped.
476        if matches!(expr, Expr::Value(_)) {
477            return None;
478        }
479
480        self.items
481            .iter()
482            .enumerate()
483            .find(|(_, item)| item.exprs.contains(expr))
484            .map(|(i, _)| ColumnRef {
485                level: 0,
486                column: i,
487            })
488    }
489
490    pub fn product(self, right: Self) -> Result<Self, PlanError> {
491        let mut l_tables = self.table_names().into_iter().collect::<Vec<_>>();
492        // Make ordering deterministic for testing
493        l_tables.sort_by(|l, r| l.item.cmp(&r.item));
494        let r_tables = right.table_names();
495        for l in l_tables {
496            for r in &r_tables {
497                if l.matches(r) {
498                    sql_bail!("table name \"{}\" specified more than once", l.item)
499                }
500            }
501        }
502        Ok(Scope {
503            items: self.items.into_iter().chain(right.items).collect(),
504            ungrouped_columns: vec![],
505            lateral_barrier: false,
506        })
507    }
508
509    pub fn project(&self, columns: &[usize]) -> Self {
510        Scope {
511            items: columns.iter().map(|&i| self.items[i].clone()).collect(),
512            ungrouped_columns: vec![],
513            lateral_barrier: false,
514        }
515    }
516
517    pub fn table_names(&self) -> BTreeSet<&PartialItemName> {
518        self.items
519            .iter()
520            .filter_map(|name| name.table_name.as_ref())
521            .collect::<BTreeSet<&PartialItemName>>()
522    }
523}
524
525/// A pointer to a scope item or an ungrouped column along side its lateral
526/// level. Used internally while iterating.
527#[derive(Debug, Clone)]
528struct ScopeCursor<'a> {
529    lat_level: usize,
530    inner: ScopeCursorInner<'a>,
531}
532
533#[derive(Debug, Clone)]
534enum ScopeCursorInner<'a> {
535    Item {
536        column: ColumnRef,
537        item: &'a ScopeItem,
538    },
539    UngroupedColumn(&'a ScopeUngroupedColumn),
540}
541
542impl ScopeCursor<'_> {
543    fn table_name(&self) -> Option<&PartialItemName> {
544        match &self.inner {
545            ScopeCursorInner::Item { item, .. } => item.table_name.as_ref(),
546            ScopeCursorInner::UngroupedColumn(uc) => uc.table_name.as_ref(),
547        }
548    }
549
550    fn column_name(&self) -> &ColumnName {
551        match &self.inner {
552            ScopeCursorInner::Item { item, .. } => &item.column_name,
553            ScopeCursorInner::UngroupedColumn(uc) => &uc.column_name,
554        }
555    }
556
557    fn allow_unqualified_references(&self) -> bool {
558        match &self.inner {
559            ScopeCursorInner::Item { item, .. } => item.allow_unqualified_references,
560            ScopeCursorInner::UngroupedColumn(uc) => uc.allow_unqualified_references,
561        }
562    }
563}