Skip to main content

mz_deploy/project/resolve/
cte_scope.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! CTE scope tracking for SQL AST visitors.
11//!
12//! Common Table Expressions (CTEs) introduce names that shadow database objects.
13//! When traversing a SQL AST to collect dependencies, extract aliases, or
14//! transform names, CTE references must be distinguished from real object
15//! references. This module provides [`CteScope`], a stack-based tracker that
16//! manages CTE name visibility across nested queries.
17//!
18//! ## Scoping Rules
19//!
20//! - **Simple CTEs** (`WITH a AS (...), b AS (...) SELECT ...`): names are
21//!   introduced incrementally, left to right. Each simple CTE's body sees only
22//!   the CTEs declared *before* it; the name of CTE `i` is not in scope while
23//!   visiting CTE `i`'s own body. All names are visible to the main query body.
24//!   This mirrors Materialize's name resolver (`fold_query` in
25//!   `src/sql/src/names.rs`, `plan_ctes` in `src/sql/src/plan/query.rs`), which
26//!   resolves each `CteBlock::Simple` body before inserting that CTE's name. As
27//!   a result a simple CTE whose name shadows a catalog object can still
28//!   reference that object inside its own definition, e.g.
29//!   `WITH products AS (SELECT * FROM products WHERE active) SELECT * FROM products`
30//!   — the inner `products` is the catalog table, the outer is the CTE. Callers
31//!   drive this by pushing an empty scope, then visiting each body and calling
32//!   [`insert_current`](CteScope::insert_current) after each.
33//!
34//! - **Mutually Recursive CTEs** (`WITH MUTUALLY RECURSIVE a AS (...), b AS (...)
35//!   SELECT ...`): All CTE names are visible to all CTE definitions and the main
36//!   query body. Self-references and forward references are valid, so all names
37//!   are pushed up front.
38//!
39//! - **Nested queries**: Each subquery can introduce its own CTEs. The scope
40//!   stack ensures inner CTE names shadow outer ones, and are properly removed
41//!   when the subquery scope ends.
42//!
43//! ## Usage Pattern
44//!
45//! Used with mz-sql-parser's `Visit` / `VisitMut` traits by overriding
46//! `visit_query`:
47//!
48//! ```ignore
49//! fn visit_query(&mut self, node: &'ast Query<Raw>) {
50//!     let names = CteScope::collect_cte_names(&node.ctes);
51//!     self.cte_scope.push(names);
52//!     visit::visit_query(self, node);  // default traversal
53//!     self.cte_scope.pop();
54//! }
55//! ```
56//!
57//! Then in `visit_table_factor`, check `self.cte_scope.is_cte(name)` before
58//! treating a single-ident reference as a database object.
59
60use std::collections::BTreeSet;
61
62use mz_sql_parser::ast::{CteBlock, Raw};
63
64/// Stack-based tracker for CTE names currently in scope.
65///
66/// Each level in the stack corresponds to one `WITH` clause. Names from all
67/// levels are visible (inner scopes shadow outer ones, though in practice
68/// CTE names don't conflict with each other — they conflict with table names).
69pub(crate) struct CteScope {
70    stack: Vec<BTreeSet<String>>,
71}
72
73impl CteScope {
74    /// Create an empty scope with no CTE names.
75    pub(crate) fn new() -> Self {
76        Self { stack: Vec::new() }
77    }
78
79    /// Push a new set of CTE names onto the scope stack.
80    ///
81    /// Call this when entering a `WITH` clause. The names remain visible
82    /// until [`pop`](Self::pop) is called.
83    pub(crate) fn push(&mut self, names: BTreeSet<String>) {
84        self.stack.push(names);
85    }
86
87    /// Pop the most recent CTE scope.
88    ///
89    /// Call this when leaving a `WITH` clause.
90    pub(crate) fn pop(&mut self) {
91        self.stack.pop();
92    }
93
94    /// Add a single CTE name to the most-recently-pushed scope level.
95    ///
96    /// Used to introduce simple-CTE names incrementally: push an empty scope on
97    /// entering the `WITH` clause, visit each CTE body, then call this after each
98    /// so that subsequent siblings (and the main query body) see the name, while
99    /// the CTE's own body resolved against the catalog. Has no effect if the
100    /// stack is empty.
101    pub(crate) fn insert_current(&mut self, name: String) {
102        if let Some(top) = self.stack.last_mut() {
103            top.insert(name);
104        }
105    }
106
107    /// Check whether `name` is a CTE in any active scope level.
108    ///
109    /// Returns `true` if the name matches a CTE at any depth in the stack.
110    /// Only unqualified single-identifier references should be checked —
111    /// multi-part names (e.g., `schema.name`) are always database object
112    /// references.
113    pub(crate) fn is_cte(&self, name: &str) -> bool {
114        self.stack.iter().any(|scope| scope.contains(name))
115    }
116
117    /// Extract all CTE names from a [`CteBlock`] into a single set.
118    ///
119    /// Appropriate for `MutuallyRecursive` blocks, where every name is visible
120    /// to every definition up front. Simple blocks must instead introduce names
121    /// incrementally via [`insert_current`](Self::insert_current); see the module
122    /// docs.
123    pub(crate) fn collect_cte_names(ctes: &CteBlock<Raw>) -> BTreeSet<String> {
124        match ctes {
125            CteBlock::Simple(ctes) => ctes.iter().map(|cte| cte.alias.name.to_string()).collect(),
126            CteBlock::MutuallyRecursive(block) => {
127                block.ctes.iter().map(|cte| cte.name.to_string()).collect()
128            }
129        }
130    }
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136
137    #[mz_ore::test]
138    fn test_empty_scope() {
139        let scope = CteScope::new();
140        assert!(!scope.is_cte("anything"));
141    }
142
143    #[mz_ore::test]
144    fn test_single_scope() {
145        let mut scope = CteScope::new();
146        scope.push(BTreeSet::from(["cte_a".to_string(), "cte_b".to_string()]));
147
148        assert!(scope.is_cte("cte_a"));
149        assert!(scope.is_cte("cte_b"));
150        assert!(!scope.is_cte("not_a_cte"));
151
152        scope.pop();
153        assert!(!scope.is_cte("cte_a"));
154    }
155
156    #[mz_ore::test]
157    fn test_nested_scopes() {
158        let mut scope = CteScope::new();
159
160        // Outer query WITH clause
161        scope.push(BTreeSet::from(["outer_cte".to_string()]));
162        assert!(scope.is_cte("outer_cte"));
163
164        // Inner subquery WITH clause
165        scope.push(BTreeSet::from(["inner_cte".to_string()]));
166        assert!(scope.is_cte("outer_cte")); // still visible
167        assert!(scope.is_cte("inner_cte"));
168
169        // Leave inner scope
170        scope.pop();
171        assert!(scope.is_cte("outer_cte"));
172        assert!(!scope.is_cte("inner_cte")); // no longer visible
173
174        // Leave outer scope
175        scope.pop();
176        assert!(!scope.is_cte("outer_cte"));
177    }
178
179    #[mz_ore::test]
180    fn test_empty_push() {
181        let mut scope = CteScope::new();
182        scope.push(BTreeSet::new());
183        assert!(!scope.is_cte("anything"));
184        scope.pop();
185    }
186}