mz_deploy/project/resolve/cte_scope.rs
1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! CTE scope tracking for SQL AST visitors.
11//!
12//! Common Table Expressions (CTEs) introduce names that shadow database objects.
13//! When traversing a SQL AST to collect dependencies, extract aliases, or
14//! transform names, CTE references must be distinguished from real object
15//! references. This module provides [`CteScope`], a stack-based tracker that
16//! manages CTE name visibility across nested queries.
17//!
18//! ## Scoping Rules
19//!
20//! - **Simple CTEs** (`WITH a AS (...), b AS (...) SELECT ...`): names are
21//! introduced incrementally, left to right. Each simple CTE's body sees only
22//! the CTEs declared *before* it; the name of CTE `i` is not in scope while
23//! visiting CTE `i`'s own body. All names are visible to the main query body.
24//! This mirrors Materialize's name resolver (`fold_query` in
25//! `src/sql/src/names.rs`, `plan_ctes` in `src/sql/src/plan/query.rs`), which
26//! resolves each `CteBlock::Simple` body before inserting that CTE's name. As
27//! a result a simple CTE whose name shadows a catalog object can still
28//! reference that object inside its own definition, e.g.
29//! `WITH products AS (SELECT * FROM products WHERE active) SELECT * FROM products`
30//! — the inner `products` is the catalog table, the outer is the CTE. Callers
31//! drive this by pushing an empty scope, then visiting each body and calling
32//! [`insert_current`](CteScope::insert_current) after each.
33//!
34//! - **Mutually Recursive CTEs** (`WITH MUTUALLY RECURSIVE a AS (...), b AS (...)
35//! SELECT ...`): All CTE names are visible to all CTE definitions and the main
36//! query body. Self-references and forward references are valid, so all names
37//! are pushed up front.
38//!
39//! - **Nested queries**: Each subquery can introduce its own CTEs. The scope
40//! stack ensures inner CTE names shadow outer ones, and are properly removed
41//! when the subquery scope ends.
42//!
43//! ## Usage Pattern
44//!
45//! Used with mz-sql-parser's `Visit` / `VisitMut` traits by overriding
46//! `visit_query`:
47//!
48//! ```ignore
49//! fn visit_query(&mut self, node: &'ast Query<Raw>) {
50//! let names = CteScope::collect_cte_names(&node.ctes);
51//! self.cte_scope.push(names);
52//! visit::visit_query(self, node); // default traversal
53//! self.cte_scope.pop();
54//! }
55//! ```
56//!
57//! Then in `visit_table_factor`, check `self.cte_scope.is_cte(name)` before
58//! treating a single-ident reference as a database object.
59
60use std::collections::BTreeSet;
61
62use mz_sql_parser::ast::{CteBlock, Raw};
63
64/// Stack-based tracker for CTE names currently in scope.
65///
66/// Each level in the stack corresponds to one `WITH` clause. Names from all
67/// levels are visible (inner scopes shadow outer ones, though in practice
68/// CTE names don't conflict with each other — they conflict with table names).
69pub(crate) struct CteScope {
70 stack: Vec<BTreeSet<String>>,
71}
72
73impl CteScope {
74 /// Create an empty scope with no CTE names.
75 pub(crate) fn new() -> Self {
76 Self { stack: Vec::new() }
77 }
78
79 /// Push a new set of CTE names onto the scope stack.
80 ///
81 /// Call this when entering a `WITH` clause. The names remain visible
82 /// until [`pop`](Self::pop) is called.
83 pub(crate) fn push(&mut self, names: BTreeSet<String>) {
84 self.stack.push(names);
85 }
86
87 /// Pop the most recent CTE scope.
88 ///
89 /// Call this when leaving a `WITH` clause.
90 pub(crate) fn pop(&mut self) {
91 self.stack.pop();
92 }
93
94 /// Add a single CTE name to the most-recently-pushed scope level.
95 ///
96 /// Used to introduce simple-CTE names incrementally: push an empty scope on
97 /// entering the `WITH` clause, visit each CTE body, then call this after each
98 /// so that subsequent siblings (and the main query body) see the name, while
99 /// the CTE's own body resolved against the catalog. Has no effect if the
100 /// stack is empty.
101 pub(crate) fn insert_current(&mut self, name: String) {
102 if let Some(top) = self.stack.last_mut() {
103 top.insert(name);
104 }
105 }
106
107 /// Check whether `name` is a CTE in any active scope level.
108 ///
109 /// Returns `true` if the name matches a CTE at any depth in the stack.
110 /// Only unqualified single-identifier references should be checked —
111 /// multi-part names (e.g., `schema.name`) are always database object
112 /// references.
113 pub(crate) fn is_cte(&self, name: &str) -> bool {
114 self.stack.iter().any(|scope| scope.contains(name))
115 }
116
117 /// Extract all CTE names from a [`CteBlock`] into a single set.
118 ///
119 /// Appropriate for `MutuallyRecursive` blocks, where every name is visible
120 /// to every definition up front. Simple blocks must instead introduce names
121 /// incrementally via [`insert_current`](Self::insert_current); see the module
122 /// docs.
123 pub(crate) fn collect_cte_names(ctes: &CteBlock<Raw>) -> BTreeSet<String> {
124 match ctes {
125 CteBlock::Simple(ctes) => ctes.iter().map(|cte| cte.alias.name.to_string()).collect(),
126 CteBlock::MutuallyRecursive(block) => {
127 block.ctes.iter().map(|cte| cte.name.to_string()).collect()
128 }
129 }
130 }
131}
132
133#[cfg(test)]
134mod tests {
135 use super::*;
136
137 #[mz_ore::test]
138 fn test_empty_scope() {
139 let scope = CteScope::new();
140 assert!(!scope.is_cte("anything"));
141 }
142
143 #[mz_ore::test]
144 fn test_single_scope() {
145 let mut scope = CteScope::new();
146 scope.push(BTreeSet::from(["cte_a".to_string(), "cte_b".to_string()]));
147
148 assert!(scope.is_cte("cte_a"));
149 assert!(scope.is_cte("cte_b"));
150 assert!(!scope.is_cte("not_a_cte"));
151
152 scope.pop();
153 assert!(!scope.is_cte("cte_a"));
154 }
155
156 #[mz_ore::test]
157 fn test_nested_scopes() {
158 let mut scope = CteScope::new();
159
160 // Outer query WITH clause
161 scope.push(BTreeSet::from(["outer_cte".to_string()]));
162 assert!(scope.is_cte("outer_cte"));
163
164 // Inner subquery WITH clause
165 scope.push(BTreeSet::from(["inner_cte".to_string()]));
166 assert!(scope.is_cte("outer_cte")); // still visible
167 assert!(scope.is_cte("inner_cte"));
168
169 // Leave inner scope
170 scope.pop();
171 assert!(scope.is_cte("outer_cte"));
172 assert!(!scope.is_cte("inner_cte")); // no longer visible
173
174 // Leave outer scope
175 scope.pop();
176 assert!(!scope.is_cte("outer_cte"));
177 }
178
179 #[mz_ore::test]
180 fn test_empty_push() {
181 let mut scope = CteScope::new();
182 scope.push(BTreeSet::new());
183 assert!(!scope.is_cte("anything"));
184 scope.pop();
185 }
186}