mz_sql/plan/scope.rs
1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Handles SQL's scoping rules.
11//!
12//! A scope spans a single SQL `Query`. Nested subqueries create new scopes.
13//! Names are resolved against the innermost scope first.
14//! * If a match is found, it is returned.
15//! * If no matches are found, the name is resolved against the parent scope.
16//! * If multiple matches are found, the name is ambiguous and we return an
17//! error to the user.
18//!
19//! Matching rules:
20//! * `bar` will match any column in the scope named `bar`
21//! * `foo.bar` will match any column in the scope named `bar` that originated
22//! from a table named `foo`.
23//! * Table aliases such as `foo AS quux` replace the old table name.
24//! * Functions create unnamed columns, which can be named with columns aliases
25//! `(bar + 1) as more_bar`.
26//!
27//! Additionally, most databases fold some form of CSE into name resolution so
28//! that eg `SELECT sum(x) FROM foo GROUP BY sum(x)` would be treated something
29//! like `SELECT "sum(x)" FROM foo GROUP BY sum(x) AS "sum(x)"` rather than
30//! failing to resolve `x`. We handle this by including the underlying
31//! `sql_parser::ast::Expr` in cases where this is possible.
32//!
33//! Many SQL expressions do strange and arbitrary things to scopes. Rather than
34//! try to capture them all here, we just expose the internals of `Scope` and
35//! handle it in the appropriate place in `super::query`.
36//!
37//! NOTE(benesch): The above approach of exposing scope's internals to the
38//! entire planner has not aged well. SQL scopes are now full of undocumented
39//! assumptions and requirements, since various subcomponents of the planner
40//! shove data into scope items to communicate with subcomponents a mile away.
41//! I've tried to refactor this code several times to no avail. It works better
42//! than you might expect. But you have been warned. Tread carefully!
43
44use std::collections::BTreeSet;
45use std::iter;
46
47use mz_ore::iter::IteratorExt;
48use mz_repr::ColumnName;
49
50use crate::ast::Expr;
51use crate::names::{Aug, PartialItemName};
52use crate::plan::error::PlanError;
53use crate::plan::hir::ColumnRef;
54use crate::plan::plan_utils::JoinSide;
55
56#[derive(Debug, Clone)]
57pub struct ScopeItem {
58 /// The name of the table that produced this scope item, if any.
59 pub table_name: Option<PartialItemName>,
60 /// The name of the column.
61 pub column_name: ColumnName,
62 /// The expressions from which this scope item is derived. Used by `GROUP
63 /// BY` and window functions.
64 pub exprs: BTreeSet<Expr<Aug>>,
65 /// Whether the column is the return value of a function that produces only
66 /// a single column. This accounts for a strange PostgreSQL special case
67 /// around whole-row expansion.
68 pub from_single_column_function: bool,
69 /// Controls whether the column is only accessible via a table-qualified
70 /// reference. When false, the scope item is also excluded from `SELECT *`.
71 ///
72 /// This should be true for almost all scope items. It is set to false for
73 /// join columns in USING constraints. For example, in `t1 FULL JOIN t2
74 /// USING a`, `t1.a` and `t2.a` are still available by fully-qualified
75 /// reference, but a bare `a` refers to a new column whose value is
76 /// `coalesce(t1.a, t2.a)`. This is a big special case because normally
77 /// having three columns in scope named `a` would result in "ambiguous
78 /// column reference" errors.
79 pub allow_unqualified_references: bool,
80 /// If set, any attempt to reference this item will return the error
81 /// produced by this function.
82 ///
83 /// The function is provided with the table and column name in the
84 /// reference. It should return a `PlanError` describing why the reference
85 /// is invalid.
86 ///
87 /// This is useful for preventing access to certain columns in specific
88 /// contexts, like columns that are on the wrong side of a `LATERAL` join.
89 pub error_if_referenced: Option<fn(Option<&PartialItemName>, &ColumnName) -> PlanError>,
90 /// For table functions in scalar positions, this flag is true for the
91 /// ordinality column. If true, then this column represents an "exists" flag
92 /// for the entire row of the table function. In that case, this column must
93 /// be excluded from `*` expansion. If the corresponding datum is `NULL`, then
94 /// `*` expansion should yield a single `NULL` instead of a record with various
95 /// datums.
96 pub is_exists_column_for_a_table_function_that_was_in_the_target_list: bool,
97 // Force use of the constructor methods.
98 _private: (),
99}
100
101/// An ungrouped column in a scope.
102///
103/// We can't simply drop these items from scope. These items need to *exist*
104/// because they might shadow variables in outer scopes that would otherwise be
105/// valid to reference, but accessing them needs to produce an error.
106#[derive(Debug, Clone)]
107pub struct ScopeUngroupedColumn {
108 /// The name of the table that produced this ungrouped column, if any.
109 pub table_name: Option<PartialItemName>,
110 /// The name of the ungrouped column.
111 pub column_name: ColumnName,
112 /// Whether the original scope item allowed unqualified references.
113 pub allow_unqualified_references: bool,
114}
115
116#[derive(Debug, Clone)]
117pub struct Scope {
118 /// The items in this scope.
119 pub items: Vec<ScopeItem>,
120 /// The ungrouped columns in the scope.
121 pub ungrouped_columns: Vec<ScopeUngroupedColumn>,
122 // Whether this scope starts a new chain of lateral outer scopes.
123 //
124 // It's easiest to understand with an example. Consider this query:
125 //
126 // SELECT (SELECT * FROM tab1, tab2, (SELECT tab1.a, tab3.a)) FROM tab3, tab4
127 // Scope 1: ------------------------
128 // Scope 2: ----
129 // Scope 3: ----
130 // Scope 4: ----
131 // Scope 5: ----
132 //
133 // Note that the because the derived table is not marked `LATERAL`, its
134 // reference to `tab3.a` is valid but its reference to `tab1.a` is not.
135 //
136 // Scope 5 is the parent of scope 4, scope 4 is the parent of scope 3, and
137 // so on. The non-lateral derived table is not allowed to access scopes 2
138 // and 3, because they are part of the same lateral chain, but it *is*
139 // allowed to access scope 4 and 5. So, to capture this information, we set
140 // `lateral_barrier: true` for scope 4.
141 pub lateral_barrier: bool,
142}
143
144impl ScopeItem {
145 pub fn empty() -> ScopeItem {
146 ScopeItem {
147 table_name: None,
148 column_name: "?column?".into(),
149 exprs: BTreeSet::new(),
150 from_single_column_function: false,
151 allow_unqualified_references: true,
152 error_if_referenced: None,
153 is_exists_column_for_a_table_function_that_was_in_the_target_list: false,
154 _private: (),
155 }
156 }
157
158 /// Constructs a new scope item from an unqualified column name.
159 pub fn from_column_name<N>(column_name: N) -> ScopeItem
160 where
161 N: Into<ColumnName>,
162 {
163 ScopeItem::from_name(None, column_name.into())
164 }
165
166 /// Constructs a new scope item from a name.
167 pub fn from_name<N>(table_name: Option<PartialItemName>, column_name: N) -> ScopeItem
168 where
169 N: Into<ColumnName>,
170 {
171 let mut item = ScopeItem::empty();
172 item.table_name = table_name;
173 item.column_name = column_name.into();
174 item
175 }
176
177 /// Constructs a new scope item with no name from an expression.
178 pub fn from_expr(expr: impl Into<Option<Expr<Aug>>>) -> ScopeItem {
179 let mut item = ScopeItem::empty();
180 if let Some(expr) = expr.into() {
181 item.exprs.insert(expr);
182 }
183 item
184 }
185
186 pub fn is_from_table(&self, table_name: &PartialItemName) -> bool {
187 match &self.table_name {
188 None => false,
189 Some(n) => n.matches(table_name),
190 }
191 }
192}
193
194impl Scope {
195 pub fn empty() -> Self {
196 Scope {
197 items: vec![],
198 ungrouped_columns: vec![],
199 lateral_barrier: false,
200 }
201 }
202
203 pub fn from_source<I, N>(table_name: Option<PartialItemName>, column_names: I) -> Self
204 where
205 I: IntoIterator<Item = N>,
206 N: Into<ColumnName>,
207 {
208 let mut scope = Scope::empty();
209 scope.items = column_names
210 .into_iter()
211 .map(|column_name| ScopeItem::from_name(table_name.clone(), column_name.into()))
212 .collect();
213 scope
214 }
215
216 /// Constructs an iterator over the canonical name for each column.
217 pub fn column_names(&self) -> impl Iterator<Item = &ColumnName> {
218 self.items.iter().map(|item| &item.column_name)
219 }
220
221 pub fn len(&self) -> usize {
222 self.items.len()
223 }
224
225 /// Iterates over all items in the scope.
226 ///
227 /// Items are returned in order of preference, where the innermost scope has
228 /// the highest preference. For example, given scopes `A(B(C))`, items are
229 /// presented in the order `C`, `B`, `A`.
230 ///
231 /// Items are returned alongside the column reference that targets that item
232 /// and the item's "lateral level". The latter bears explaining. The lateral
233 /// level is the number of lateral barriers between this scope and the item.
234 /// See `Scope::lateral_barrier` for a diagram. Roughly speaking, items from
235 /// different levels but the same lateral level are items from different
236 /// joins in the same subquery, while items in different lateral levels are
237 /// items from different queries entirely. Rules about ambiguity apply
238 /// within an entire lateral level, not just within a single scope level.
239 ///
240 /// NOTE(benesch): Scope` really shows its weaknesses here. Ideally we'd
241 /// have separate types like `QueryScope` and `JoinScope` that more
242 /// naturally encode the concept of a "lateral level", or at least something
243 /// along those lines.
244 fn all_items<'a>(
245 &'a self,
246 outer_scopes: &'a [Scope],
247 ) -> impl Iterator<Item = ScopeCursor<'a>> + 'a {
248 let mut lat_level = 0;
249 iter::once(self)
250 .chain(outer_scopes)
251 .enumerate()
252 .flat_map(move |(level, scope)| {
253 if scope.lateral_barrier {
254 lat_level += 1;
255 }
256 let items = scope
257 .items
258 .iter()
259 .enumerate()
260 .map(move |(column, item)| ScopeCursor {
261 lat_level,
262 inner: ScopeCursorInner::Item {
263 column: ColumnRef { level, column },
264 item,
265 },
266 });
267 let ungrouped_columns = scope.ungrouped_columns.iter().map(move |uc| ScopeCursor {
268 lat_level,
269 inner: ScopeCursorInner::UngroupedColumn(uc),
270 });
271 items.chain(ungrouped_columns)
272 })
273 }
274
275 /// Returns all items from the given table name in the closest scope.
276 ///
277 /// If no tables with the given name are in scope, returns an empty
278 /// iterator.
279 ///
280 /// NOTE(benesch): This is wrong for zero-arity relations, because we can't
281 /// distinguish between "no such table" and a table that exists but has no
282 /// columns. The current design of scope makes this difficult to fix,
283 /// unfortunately.
284 pub fn items_from_table<'a>(
285 &'a self,
286 outer_scopes: &'a [Scope],
287 table: &PartialItemName,
288 ) -> Result<Vec<(ColumnRef, &'a ScopeItem)>, PlanError> {
289 let mut seen_level = None;
290 let items: Vec<_> = self
291 .all_items(outer_scopes)
292 .filter_map(move |c| match c.inner {
293 ScopeCursorInner::Item { column, item }
294 if item.is_from_table(table)
295 && *seen_level.get_or_insert(c.lat_level) == c.lat_level =>
296 {
297 Some((column, item))
298 }
299 _ => None,
300 })
301 .collect();
302 if !items.iter().map(|(column, _)| column.level).all_equal() {
303 return Err(PlanError::AmbiguousTable(table.clone()));
304 }
305 Ok(items)
306 }
307
308 /// Returns a matching [`ColumnRef`], if one exists.
309 ///
310 /// Filters all visible items against the provided `matches` closure, and then matches this
311 /// filtered set against the provided `column_name`.
312 fn resolve_internal<'a, M>(
313 &'a self,
314 outer_scopes: &[Scope],
315 mut matches: M,
316 table_name: Option<&PartialItemName>,
317 column_name: &ColumnName,
318 ) -> Result<ColumnRef, PlanError>
319 where
320 M: FnMut(&ScopeCursor) -> bool,
321 {
322 let mut results = self
323 .all_items(outer_scopes)
324 .filter(|c| (matches)(c) && c.column_name() == column_name);
325 match results.next() {
326 None => {
327 let similar = self
328 .all_items(outer_scopes)
329 .filter(|c| (matches)(c))
330 .filter_map(|c| {
331 c.column_name()
332 .is_similar(column_name)
333 .then(|| c.column_name().clone())
334 })
335 .collect();
336 Err(PlanError::UnknownColumn {
337 table: table_name.cloned(),
338 column: column_name.clone(),
339 similar,
340 })
341 }
342 Some(c) => {
343 if let Some(ambiguous) = results.find(|c2| c.lat_level == c2.lat_level) {
344 if let Some(table_name) = table_name {
345 if let (
346 ScopeCursorInner::Item {
347 column: ColumnRef { level: c_level, .. },
348 ..
349 },
350 ScopeCursorInner::Item {
351 column:
352 ColumnRef {
353 level: ambiguous_level,
354 ..
355 },
356 ..
357 },
358 ) = (c.inner, ambiguous.inner)
359 {
360 // ColumnRefs with identical levels indicate multiple columns of the
361 // same name in relation. If the levels differ then it is instead two
362 // tables with the same name, both having a column with this name.
363 if c_level == ambiguous_level {
364 return Err(PlanError::AmbiguousColumn(column_name.clone()));
365 }
366 }
367 return Err(PlanError::AmbiguousTable(table_name.clone()));
368 } else {
369 return Err(PlanError::AmbiguousColumn(column_name.clone()));
370 }
371 }
372
373 match c.inner {
374 ScopeCursorInner::UngroupedColumn(uc) => Err(PlanError::UngroupedColumn {
375 table: uc.table_name.clone(),
376 column: uc.column_name.clone(),
377 }),
378 ScopeCursorInner::Item { column, item } => {
379 if let Some(error_if_referenced) = item.error_if_referenced {
380 return Err(error_if_referenced(table_name, column_name));
381 }
382 Ok(column)
383 }
384 }
385 }
386 }
387 }
388
389 /// Resolves references to a column name to a single column, or errors if
390 /// multiple columns are equally valid references.
391 pub fn resolve_column<'a>(
392 &'a self,
393 outer_scopes: &[Scope],
394 column_name: &ColumnName,
395 ) -> Result<ColumnRef, PlanError> {
396 let table_name = None;
397 self.resolve_internal(
398 outer_scopes,
399 |c| c.allow_unqualified_references(),
400 table_name,
401 column_name,
402 )
403 }
404
405 /// Resolves a column name in a `USING` clause.
406 pub fn resolve_using_column(
407 &self,
408 column_name: &ColumnName,
409 join_side: JoinSide,
410 ) -> Result<ColumnRef, PlanError> {
411 self.resolve_column(&[], column_name).map_err(|e| match e {
412 // Attach a bit more context to unknown and ambiguous column
413 // errors to match PostgreSQL.
414 PlanError::AmbiguousColumn(column) => {
415 PlanError::AmbiguousColumnInUsingClause { column, join_side }
416 }
417 PlanError::UnknownColumn { column, .. } => {
418 PlanError::UnknownColumnInUsingClause { column, join_side }
419 }
420 _ => e,
421 })
422 }
423
424 pub fn resolve_table_column<'a>(
425 &'a self,
426 outer_scopes: &[Scope],
427 table_name: &PartialItemName,
428 column_name: &ColumnName,
429 ) -> Result<ColumnRef, PlanError> {
430 let mut seen_at_level = None;
431 self.resolve_internal(
432 outer_scopes,
433 |c| {
434 // Once we've matched a table name at a lateral level, even if
435 // the column name did not match, we can never match an item
436 // from another lateral level.
437 if let Some(seen_at_level) = seen_at_level {
438 if seen_at_level != c.lat_level {
439 return false;
440 }
441 }
442 if c.table_name().as_ref().map(|n| n.matches(table_name)) == Some(true) {
443 seen_at_level = Some(c.lat_level);
444 true
445 } else {
446 false
447 }
448 },
449 Some(table_name),
450 column_name,
451 )
452 }
453
454 pub fn resolve<'a>(
455 &'a self,
456 outer_scopes: &[Scope],
457 table_name: Option<&PartialItemName>,
458 column_name: &ColumnName,
459 ) -> Result<ColumnRef, PlanError> {
460 match table_name {
461 None => self.resolve_column(outer_scopes, column_name),
462 Some(table_name) => self.resolve_table_column(outer_scopes, table_name, column_name),
463 }
464 }
465
466 /// Look to see if there is an already-calculated instance of this expr.
467 /// Failing to find one is not an error, so this just returns Option
468 pub fn resolve_expr<'a>(&'a self, expr: &Expr<Aug>) -> Option<ColumnRef> {
469 // Literal values should not be treated as "cached" because their types
470 // in scope will have already been determined, but the type of the
471 // reoccurence of the expr might want to have a different type.
472 //
473 // This is most evident in the case of literal `NULL` values. The first
474 // occurrence is likely to be cast as `ScalarType::String`, but
475 // subsequent `NULL` values should be untyped.
476 if matches!(expr, Expr::Value(_)) {
477 return None;
478 }
479
480 self.items
481 .iter()
482 .enumerate()
483 .find(|(_, item)| item.exprs.contains(expr))
484 .map(|(i, _)| ColumnRef {
485 level: 0,
486 column: i,
487 })
488 }
489
490 pub fn product(self, right: Self) -> Result<Self, PlanError> {
491 let mut l_tables = self.table_names().into_iter().collect::<Vec<_>>();
492 // Make ordering deterministic for testing
493 l_tables.sort_by(|l, r| l.item.cmp(&r.item));
494 let r_tables = right.table_names();
495 for l in l_tables {
496 for r in &r_tables {
497 if l.matches(r) {
498 sql_bail!("table name \"{}\" specified more than once", l.item)
499 }
500 }
501 }
502 Ok(Scope {
503 items: self.items.into_iter().chain(right.items).collect(),
504 ungrouped_columns: vec![],
505 lateral_barrier: false,
506 })
507 }
508
509 pub fn project(&self, columns: &[usize]) -> Self {
510 Scope {
511 items: columns.iter().map(|&i| self.items[i].clone()).collect(),
512 ungrouped_columns: vec![],
513 lateral_barrier: false,
514 }
515 }
516
517 pub fn table_names(&self) -> BTreeSet<&PartialItemName> {
518 self.items
519 .iter()
520 .filter_map(|name| name.table_name.as_ref())
521 .collect::<BTreeSet<&PartialItemName>>()
522 }
523}
524
525/// A pointer to a scope item or an ungrouped column along side its lateral
526/// level. Used internally while iterating.
527#[derive(Debug, Clone)]
528struct ScopeCursor<'a> {
529 lat_level: usize,
530 inner: ScopeCursorInner<'a>,
531}
532
533#[derive(Debug, Clone)]
534enum ScopeCursorInner<'a> {
535 Item {
536 column: ColumnRef,
537 item: &'a ScopeItem,
538 },
539 UngroupedColumn(&'a ScopeUngroupedColumn),
540}
541
542impl ScopeCursor<'_> {
543 fn table_name(&self) -> Option<&PartialItemName> {
544 match &self.inner {
545 ScopeCursorInner::Item { item, .. } => item.table_name.as_ref(),
546 ScopeCursorInner::UngroupedColumn(uc) => uc.table_name.as_ref(),
547 }
548 }
549
550 fn column_name(&self) -> &ColumnName {
551 match &self.inner {
552 ScopeCursorInner::Item { item, .. } => &item.column_name,
553 ScopeCursorInner::UngroupedColumn(uc) => &uc.column_name,
554 }
555 }
556
557 fn allow_unqualified_references(&self) -> bool {
558 match &self.inner {
559 ScopeCursorInner::Item { item, .. } => item.allow_unqualified_references,
560 ScopeCursorInner::UngroupedColumn(uc) => uc.allow_unqualified_references,
561 }
562 }
563}