mz_sql/ast/
transform.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Provides a publicly available interface to transform our SQL ASTs.
11
12use std::collections::{BTreeMap, BTreeSet};
13
14use mz_ore::str::StrExt;
15use mz_repr::CatalogItemId;
16use mz_sql_parser::ast::CreateTableFromSourceStatement;
17
18use crate::ast::visit::{self, Visit};
19use crate::ast::visit_mut::{self, VisitMut};
20use crate::ast::{
21    AstInfo, CreateConnectionStatement, CreateIndexStatement, CreateMaterializedViewStatement,
22    CreateSecretStatement, CreateSinkStatement, CreateSourceStatement, CreateSubsourceStatement,
23    CreateTableStatement, CreateViewStatement, CreateWebhookSourceStatement, Expr, Ident, Query,
24    Raw, RawItemName, Statement, UnresolvedItemName, ViewDefinition,
25};
26use crate::names::FullItemName;
27
28/// Given a [`Statement`] rewrites all references of the schema name `cur_schema_name` to
29/// `new_schema_name`.
30pub fn create_stmt_rename_schema_refs(
31    create_stmt: &mut Statement<Raw>,
32    database: &str,
33    cur_schema: &str,
34    new_schema: &str,
35) -> Result<(), (String, String)> {
36    match create_stmt {
37        stmt @ Statement::CreateConnection(_)
38        | stmt @ Statement::CreateDatabase(_)
39        | stmt @ Statement::CreateSchema(_)
40        | stmt @ Statement::CreateWebhookSource(_)
41        | stmt @ Statement::CreateSource(_)
42        | stmt @ Statement::CreateSubsource(_)
43        | stmt @ Statement::CreateSink(_)
44        | stmt @ Statement::CreateView(_)
45        | stmt @ Statement::CreateMaterializedView(_)
46        | stmt @ Statement::CreateTable(_)
47        | stmt @ Statement::CreateTableFromSource(_)
48        | stmt @ Statement::CreateIndex(_)
49        | stmt @ Statement::CreateType(_)
50        | stmt @ Statement::CreateSecret(_) => {
51            let mut visitor = CreateSqlRewriteSchema {
52                database,
53                cur_schema,
54                new_schema,
55                error: None,
56            };
57            visitor.visit_statement_mut(stmt);
58
59            if let Some(e) = visitor.error.take() {
60                Err(e)
61            } else {
62                Ok(())
63            }
64        }
65        stmt => {
66            unreachable!("Internal error: only catalog items need to update item refs. {stmt:?}")
67        }
68    }
69}
70
71struct CreateSqlRewriteSchema<'a> {
72    database: &'a str,
73    cur_schema: &'a str,
74    new_schema: &'a str,
75    error: Option<(String, String)>,
76}
77
78impl<'a> CreateSqlRewriteSchema<'a> {
79    fn maybe_rewrite_idents(&mut self, name: &mut [Ident]) {
80        match name {
81            [schema, item] if schema.as_str() == self.cur_schema => {
82                // TODO(parkmycar): I _think_ when the database component is not specified we can
83                // always infer we're using the current database. But I'm not positive, so for now
84                // we'll bail in this case.
85                if self.error.is_none() {
86                    self.error = Some((schema.to_string(), item.to_string()));
87                }
88            }
89            [database, schema, _item] => {
90                if database.as_str() == self.database && schema.as_str() == self.cur_schema {
91                    *schema = Ident::new_unchecked(self.new_schema);
92                }
93            }
94            _ => (),
95        }
96    }
97}
98
99impl<'a, 'ast> VisitMut<'ast, Raw> for CreateSqlRewriteSchema<'a> {
100    fn visit_expr_mut(&mut self, e: &'ast mut Expr<Raw>) {
101        match e {
102            Expr::Identifier(id) => {
103                // The last ID component is a column name that should not be
104                // considered in the rewrite.
105                let i = id.len() - 1;
106                self.maybe_rewrite_idents(&mut id[..i]);
107            }
108            Expr::QualifiedWildcard(id) => {
109                self.maybe_rewrite_idents(id);
110            }
111            _ => visit_mut::visit_expr_mut(self, e),
112        }
113    }
114
115    fn visit_unresolved_item_name_mut(
116        &mut self,
117        unresolved_item_name: &'ast mut UnresolvedItemName,
118    ) {
119        self.maybe_rewrite_idents(&mut unresolved_item_name.0);
120    }
121
122    fn visit_item_name_mut(
123        &mut self,
124        item_name: &'ast mut <mz_sql_parser::ast::Raw as AstInfo>::ItemName,
125    ) {
126        match item_name {
127            RawItemName::Name(n) | RawItemName::Id(_, n, _) => self.maybe_rewrite_idents(&mut n.0),
128        }
129    }
130}
131
132/// Changes the `name` used in an item's `CREATE` statement. To complete a
133/// rename operation, you must also call `create_stmt_rename_refs` on all dependent
134/// items.
135pub fn create_stmt_rename(create_stmt: &mut Statement<Raw>, to_item_name: String) {
136    // TODO(sploiselle): Support renaming schemas and databases.
137    match create_stmt {
138        Statement::CreateIndex(CreateIndexStatement { name, .. }) => {
139            *name = Some(Ident::new_unchecked(to_item_name));
140        }
141        Statement::CreateSink(CreateSinkStatement {
142            name: Some(name), ..
143        })
144        | Statement::CreateSource(CreateSourceStatement { name, .. })
145        | Statement::CreateSubsource(CreateSubsourceStatement { name, .. })
146        | Statement::CreateView(CreateViewStatement {
147            definition: ViewDefinition { name, .. },
148            ..
149        })
150        | Statement::CreateMaterializedView(CreateMaterializedViewStatement { name, .. })
151        | Statement::CreateTable(CreateTableStatement { name, .. })
152        | Statement::CreateTableFromSource(CreateTableFromSourceStatement { name, .. })
153        | Statement::CreateSecret(CreateSecretStatement { name, .. })
154        | Statement::CreateConnection(CreateConnectionStatement { name, .. })
155        | Statement::CreateWebhookSource(CreateWebhookSourceStatement { name, .. }) => {
156            // The last name in an ItemName is the item name. The item name
157            // does not have a fixed index.
158            // TODO: https://github.com/MaterializeInc/database-issues/issues/1721
159            let item_name_len = name.0.len() - 1;
160            name.0[item_name_len] = Ident::new_unchecked(to_item_name);
161        }
162        item => unreachable!("Internal error: only catalog items can be renamed {item:?}"),
163    }
164}
165
166/// Updates all references of `from_name` in `create_stmt` to `to_name` or
167/// errors if request is ambiguous.
168///
169/// Requests are considered ambiguous if `create_stmt` is a
170/// `Statement::CreateView`, and any of the following apply to its `query`:
171/// - `to_name.item` is used as an [`Ident`] in `query`.
172/// - `from_name.item` does not unambiguously refer to an item in the query,
173///   e.g. it is also used as a schema, or not all references to the item are
174///   sufficiently qualified.
175/// - `to_name.item` does not unambiguously refer to an item in the query after
176///   the rename. Right now, given the first condition, this is just a coherence
177///   check, but will be more meaningful once the first restriction is lifted.
178pub fn create_stmt_rename_refs(
179    create_stmt: &mut Statement<Raw>,
180    from_name: FullItemName,
181    to_item_name: String,
182) -> Result<(), String> {
183    let from_item = UnresolvedItemName::from(from_name.clone());
184    let maybe_update_item_name = |item_name: &mut UnresolvedItemName| {
185        if item_name.0 == from_item.0 {
186            // The last name in an ItemName is the item name. The item name
187            // does not have a fixed index.
188            // TODO: https://github.com/MaterializeInc/database-issues/issues/1721
189            let item_name_len = item_name.0.len() - 1;
190            item_name.0[item_name_len] = Ident::new_unchecked(to_item_name.clone());
191        }
192    };
193
194    // TODO(sploiselle): Support renaming schemas and databases.
195    match create_stmt {
196        Statement::CreateIndex(CreateIndexStatement { on_name, .. }) => {
197            maybe_update_item_name(on_name.name_mut());
198        }
199        Statement::CreateSink(CreateSinkStatement { from, .. }) => {
200            maybe_update_item_name(from.name_mut());
201        }
202        Statement::CreateView(CreateViewStatement {
203            definition: ViewDefinition { query, .. },
204            ..
205        }) => {
206            rewrite_query(from_name, to_item_name, query)?;
207        }
208        Statement::CreateMaterializedView(CreateMaterializedViewStatement {
209            replacing,
210            query,
211            ..
212        }) => {
213            if let Some(target) = replacing {
214                maybe_update_item_name(target.name_mut());
215            }
216            rewrite_query(from_name, to_item_name, query)?;
217        }
218        Statement::CreateSource(_)
219        | Statement::CreateSubsource(_)
220        | Statement::CreateTable(_)
221        | Statement::CreateTableFromSource(_)
222        | Statement::CreateSecret(_)
223        | Statement::CreateConnection(_)
224        | Statement::CreateWebhookSource(_) => {}
225        item => {
226            unreachable!("Internal error: only catalog items need to update item refs {item:?}")
227        }
228    }
229
230    Ok(())
231}
232
233/// Rewrites `query`'s references of `from` to `to` or errors if too ambiguous.
234fn rewrite_query(from: FullItemName, to: String, query: &mut Query<Raw>) -> Result<(), String> {
235    let from_ident = Ident::new_unchecked(from.item.clone());
236    let to_ident = Ident::new_unchecked(to);
237    let qual_depth =
238        QueryIdentAgg::determine_qual_depth(&from_ident, Some(to_ident.clone()), query)?;
239    CreateSqlRewriter::rewrite_query_with_qual_depth(from, to_ident.clone(), qual_depth, query);
240    // Ensure that our rewrite didn't didn't introduce ambiguous
241    // references to `to_name`.
242    match QueryIdentAgg::determine_qual_depth(&to_ident, None, query) {
243        Ok(_) => Ok(()),
244        Err(e) => Err(e),
245    }
246}
247
248fn ambiguous_err(n: &Ident, t: &str) -> String {
249    format!(
250        "{} potentially used ambiguously as item and {}",
251        n.as_str().quoted(),
252        t
253    )
254}
255
256/// Visits a [`Query`], assessing catalog item [`Ident`]s' use of a specified `Ident`.
257struct QueryIdentAgg<'a> {
258    /// The name whose usage you want to assess.
259    name: &'a Ident,
260    /// Tracks all second-level qualifiers used on `name` in a `BTreeMap`, as
261    /// well as any third-level qualifiers used on those second-level qualifiers
262    /// in a `BTreeSet`.
263    qualifiers: BTreeMap<Ident, BTreeSet<Ident>>,
264    /// Tracks the least qualified instance of `name` seen.
265    min_qual_depth: usize,
266    /// Provides an option to fail the visit if encounters a specified `Ident`.
267    fail_on: Option<Ident>,
268    err: Option<String>,
269}
270
271impl<'a> QueryIdentAgg<'a> {
272    /// Determines the depth of qualification needed to unambiguously reference
273    /// catalog items in a [`Query`].
274    ///
275    /// Includes an option to fail if a given `Ident` is encountered.
276    ///
277    /// `Result`s of `Ok(usize)` indicate that `name` can be unambiguously
278    /// referred to with `usize` parts, e.g. 2 requires schema and item name
279    /// qualification.
280    ///
281    /// `Result`s of `Err` indicate that we cannot unambiguously reference
282    /// `name` or encountered `fail_on`, if it's provided.
283    fn determine_qual_depth(
284        name: &Ident,
285        fail_on: Option<Ident>,
286        query: &Query<Raw>,
287    ) -> Result<usize, String> {
288        let mut v = QueryIdentAgg {
289            qualifiers: BTreeMap::new(),
290            min_qual_depth: usize::MAX,
291            err: None,
292            name,
293            fail_on,
294        };
295
296        // Aggregate identities in `v`.
297        v.visit_query(query);
298        // Not possible to have a qualification depth of 0;
299        assert!(v.min_qual_depth > 0);
300
301        if let Some(e) = v.err {
302            return Err(e);
303        }
304
305        // Check if there was more than one 3rd-level (e.g.
306        // database) qualification used for any reference to `name`.
307        let req_depth = if v.qualifiers.values().any(|v| v.len() > 1) {
308            3
309        // Check if there was more than one 2nd-level (e.g. schema)
310        // qualification used for any reference to `name`.
311        } else if v.qualifiers.len() > 1 {
312            2
313        } else {
314            1
315        };
316
317        if v.min_qual_depth < req_depth {
318            Err(format!(
319                "{} is not sufficiently qualified to support renaming",
320                name.as_str().quoted()
321            ))
322        } else {
323            Ok(req_depth)
324        }
325    }
326
327    // Assesses `v` for uses of `self.name` and `self.fail_on`.
328    fn check_failure(&mut self, v: &[Ident]) {
329        // Fail if we encounter `self.fail_on`.
330        if let Some(f) = &self.fail_on {
331            if v.iter().any(|i| i == f) {
332                self.err = Some(format!(
333                    "found reference to {}; cannot rename {} to any identity \
334                    used in any existing view definitions",
335                    f.as_str().quoted(),
336                    self.name.as_str().quoted()
337                ));
338            }
339        }
340    }
341}
342
343impl<'a, 'ast> Visit<'ast, Raw> for QueryIdentAgg<'a> {
344    fn visit_expr(&mut self, e: &'ast Expr<Raw>) {
345        match e {
346            Expr::Identifier(i) => {
347                self.check_failure(i);
348                if let Some(p) = i.iter().rposition(|e| e == self.name) {
349                    if p == i.len() - 1 {
350                        // `self.name` used as a column if it's in the final
351                        // position here, e.g. `SELECT view.col FROM ...`
352                        self.err = Some(ambiguous_err(self.name, "column"));
353                        return;
354                    }
355                    self.min_qual_depth = std::cmp::min(p + 1, self.min_qual_depth);
356                }
357            }
358            Expr::QualifiedWildcard(i) => {
359                self.check_failure(i);
360                if let Some(p) = i.iter().rposition(|e| e == self.name) {
361                    self.min_qual_depth = std::cmp::min(p + 1, self.min_qual_depth);
362                }
363            }
364            _ => visit::visit_expr(self, e),
365        }
366    }
367
368    fn visit_ident(&mut self, ident: &'ast Ident) {
369        self.check_failure(std::slice::from_ref(ident));
370        // This is an unqualified item using `self.name`, e.g. an alias, which
371        // we cannot unambiguously resolve.
372        if ident == self.name {
373            self.err = Some(ambiguous_err(self.name, "alias or column"));
374        }
375    }
376
377    fn visit_unresolved_item_name(&mut self, unresolved_item_name: &'ast UnresolvedItemName) {
378        let names = &unresolved_item_name.0;
379        self.check_failure(names);
380        // Every item is used as an `ItemName` at least once, which
381        // lets use track all items named `self.name`.
382        if let Some(p) = names.iter().rposition(|e| e == self.name) {
383            // Name used as last element of `<db>.<schema>.<item>`
384            if p == names.len() - 1 && names.len() == 3 {
385                self.qualifiers
386                    .entry(names[1].clone())
387                    .or_default()
388                    .insert(names[0].clone());
389                self.min_qual_depth = std::cmp::min(3, self.min_qual_depth);
390            } else {
391                // Any other use is a database or schema
392                self.err = Some(ambiguous_err(self.name, "database, schema, or function"))
393            }
394        }
395    }
396
397    fn visit_item_name(&mut self, item_name: &'ast <Raw as AstInfo>::ItemName) {
398        match item_name {
399            RawItemName::Name(n) | RawItemName::Id(_, n, _) => self.visit_unresolved_item_name(n),
400        }
401    }
402}
403
404struct CreateSqlRewriter {
405    from: Vec<Ident>,
406    to: Ident,
407}
408
409impl CreateSqlRewriter {
410    fn rewrite_query_with_qual_depth(
411        from_name: FullItemName,
412        to_name: Ident,
413        qual_depth: usize,
414        query: &mut Query<Raw>,
415    ) {
416        let from = match qual_depth {
417            1 => vec![Ident::new_unchecked(from_name.item)],
418            2 => vec![
419                Ident::new_unchecked(from_name.schema),
420                Ident::new_unchecked(from_name.item),
421            ],
422            3 => vec![
423                Ident::new_unchecked(from_name.database.to_string()),
424                Ident::new_unchecked(from_name.schema),
425                Ident::new_unchecked(from_name.item),
426            ],
427            _ => unreachable!(),
428        };
429        let mut v = CreateSqlRewriter { from, to: to_name };
430        v.visit_query_mut(query);
431    }
432
433    fn maybe_rewrite_idents(&self, name: &mut [Ident]) {
434        if name.len() > 0 && name.ends_with(&self.from) {
435            name[name.len() - 1] = self.to.clone();
436        }
437    }
438}
439
440impl<'ast> VisitMut<'ast, Raw> for CreateSqlRewriter {
441    fn visit_expr_mut(&mut self, e: &'ast mut Expr<Raw>) {
442        match e {
443            Expr::Identifier(id) => {
444                // The last ID component is a column name that should not be
445                // considered in the rewrite.
446                let i = id.len() - 1;
447                self.maybe_rewrite_idents(&mut id[..i]);
448            }
449            Expr::QualifiedWildcard(id) => {
450                self.maybe_rewrite_idents(id);
451            }
452            _ => visit_mut::visit_expr_mut(self, e),
453        }
454    }
455    fn visit_unresolved_item_name_mut(
456        &mut self,
457        unresolved_item_name: &'ast mut UnresolvedItemName,
458    ) {
459        self.maybe_rewrite_idents(&mut unresolved_item_name.0);
460    }
461    fn visit_item_name_mut(
462        &mut self,
463        item_name: &'ast mut <mz_sql_parser::ast::Raw as AstInfo>::ItemName,
464    ) {
465        match item_name {
466            RawItemName::Name(n) | RawItemName::Id(_, n, _) => self.maybe_rewrite_idents(&mut n.0),
467        }
468    }
469}
470
471/// Updates all `CatalogItemId`s from the keys of `ids` to the values of `ids` within `create_stmt`.
472pub fn create_stmt_replace_ids(
473    create_stmt: &mut Statement<Raw>,
474    ids: &BTreeMap<CatalogItemId, CatalogItemId>,
475) {
476    let mut id_replacer = CreateSqlIdReplacer { ids };
477    id_replacer.visit_statement_mut(create_stmt);
478}
479
480struct CreateSqlIdReplacer<'a> {
481    ids: &'a BTreeMap<CatalogItemId, CatalogItemId>,
482}
483
484impl<'ast> VisitMut<'ast, Raw> for CreateSqlIdReplacer<'_> {
485    fn visit_item_name_mut(
486        &mut self,
487        item_name: &'ast mut <mz_sql_parser::ast::Raw as AstInfo>::ItemName,
488    ) {
489        match item_name {
490            RawItemName::Id(id, _, _) => {
491                let old_id = match id.parse() {
492                    Ok(old_id) => old_id,
493                    Err(_) => panic!("invalid persisted global id {id}"),
494                };
495                if let Some(new_id) = self.ids.get(&old_id) {
496                    *id = new_id.to_string();
497                }
498            }
499            RawItemName::Name(_) => {}
500        }
501    }
502}