mz_sql_parser/ast/metadata.rs
1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License in the LICENSE file at the
6// root of this repository, or online at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16use std::fmt::{self, Debug};
17use std::hash::Hash;
18
19use crate::ast::display::{self, AstDisplay, AstFormatter};
20use crate::ast::fold::{Fold, FoldNode};
21use crate::ast::{
22 Ident, Statement, UnresolvedDatabaseName, UnresolvedItemName, UnresolvedObjectName,
23 UnresolvedSchemaName, Version,
24};
25
26/// This represents the metadata that lives next to an AST, as we take it through
27/// various stages in the planning process.
28///
29/// Conceptually, when we first receive an AST from the parsing process, it only
30/// represents the syntax that the user input, and has no semantic information
31/// embedded in it. Later in this process, we want to be able to walk the tree
32/// and add additional information to it piecemeal, perhaps without going down
33/// the full planning pipeline. AstInfo represents various bits of information
34/// that get stored in the tree: for instance, at first, table names are only
35/// represented by the names the user input (in the `Raw` implementor of this
36/// trait), but later on, we replace them with both the name along with the ID
37/// that it gets resolved to.
38///
39/// Currently this process brings an `Ast<Raw>` to `Ast<Aug>`, and lives in
40/// sql/src/names.rs:resolve.
41pub trait AstInfo: Clone {
42 /// The type used for nested statements.
43 type NestedStatement: AstDisplay + Clone + Hash + Debug + Eq;
44 /// The type used for item references. Items are the subset of objects that are namespaced by a
45 /// database and schema.
46 type ItemName: AstDisplay + Clone + Hash + Debug + Eq + Ord;
47 /// The type used to specify a column.
48 ///
49 /// n.b. when implementing visitors, you likely want to build the visitor to
50 /// visit [`crate::ast::ColumnName`] instead of visiting this struct
51 /// directly. The visitor on this should usually just return an error.
52 type ColumnReference: AstDisplay + Clone + Hash + Debug + Eq + Ord;
53 /// The type used for schema names.
54 type SchemaName: AstDisplay + Clone + Hash + Debug + Eq + Ord;
55 /// The type used for database names.
56 type DatabaseName: AstDisplay + Clone + Hash + Debug + Eq + Ord;
57 /// The type used for cluster names.
58 type ClusterName: AstDisplay + Clone + Hash + Debug + Eq + Ord;
59 /// The type used for data types.
60 type DataType: AstDisplay + Clone + Hash + Debug + Eq + Ord;
61 /// The type stored next to CTEs for their assigned ID.
62 type CteId: Clone + Hash + Debug + Eq + Ord;
63 /// The type used for role references.
64 type RoleName: AstDisplay + Clone + Hash + Debug + Eq + Ord;
65 /// The type used for network policy references.
66 type NetworkPolicyName: AstDisplay + Clone + Hash + Debug + Eq + Ord;
67 /// They type used for any object names. Objects are the superset of all objects in Materialize.
68 type ObjectName: AstDisplay + Clone + Hash + Debug + Eq + Ord;
69}
70
71#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone, Default)]
72pub struct Raw;
73
74impl AstInfo for Raw {
75 type NestedStatement = Statement<Raw>;
76 type ItemName = RawItemName;
77 type ColumnReference = Ident;
78 type SchemaName = UnresolvedSchemaName;
79 type DatabaseName = UnresolvedDatabaseName;
80 type ClusterName = RawClusterName;
81 type DataType = RawDataType;
82 type CteId = ();
83 type RoleName = Ident;
84 type NetworkPolicyName = RawNetworkPolicyName;
85 type ObjectName = UnresolvedObjectName;
86}
87
88#[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone)]
89pub enum RawItemName {
90 Name(UnresolvedItemName),
91 Id(String, UnresolvedItemName, Option<Version>),
92}
93
94impl RawItemName {
95 pub fn name(&self) -> &UnresolvedItemName {
96 match self {
97 RawItemName::Name(name) => name,
98 RawItemName::Id(_, name, _) => name,
99 }
100 }
101
102 pub fn name_mut(&mut self) -> &mut UnresolvedItemName {
103 match self {
104 RawItemName::Name(name) => name,
105 RawItemName::Id(_, name, _) => name,
106 }
107 }
108}
109
110impl AstDisplay for RawItemName {
111 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
112 match self {
113 RawItemName::Name(o) => f.write_node(o),
114 RawItemName::Id(id, o, v) => {
115 // `id` is parsed from an identifier token (`[<id> AS …]`), so a
116 // crafted id with spaces/keywords must be quoted to reparse as a
117 // single identifier. A normal global id like `u1` is printed
118 // bare in every mode — including the stable mode `pg_get_viewdef`
119 // renders in, which would otherwise force-quote it.
120 f.write_str("[");
121 let id = Ident::new_unchecked(id.clone());
122 if id.can_be_printed_bare() {
123 f.write_str(id.as_str());
124 } else {
125 f.write_node(&id);
126 }
127 f.write_str(" AS ");
128 f.write_node(o);
129 if let Some(v) = v {
130 f.write_str(" VERSION ");
131 f.write_node(v);
132 }
133 f.write_str("]");
134 }
135 }
136 }
137}
138impl_display!(RawItemName);
139
140impl<T> FoldNode<Raw, T> for RawItemName
141where
142 T: AstInfo,
143{
144 type Folded = T::ItemName;
145
146 fn fold<F>(self, f: &mut F) -> Self::Folded
147 where
148 F: Fold<Raw, T>,
149 {
150 f.fold_item_name(self)
151 }
152}
153
154#[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone)]
155pub enum RawClusterName {
156 Unresolved(Ident),
157 Resolved(String),
158}
159
160impl AstDisplay for RawClusterName {
161 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
162 match self {
163 RawClusterName::Unresolved(id) => f.write_node(id),
164 RawClusterName::Resolved(id) => {
165 f.write_str(format!("[{}]", id));
166 }
167 }
168 }
169}
170impl_display!(RawClusterName);
171
172impl<T> FoldNode<Raw, T> for RawClusterName
173where
174 T: AstInfo,
175{
176 type Folded = T::ClusterName;
177
178 fn fold<F>(self, f: &mut F) -> Self::Folded
179 where
180 F: Fold<Raw, T>,
181 {
182 f.fold_cluster_name(self)
183 }
184}
185
186#[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone)]
187pub enum RawNetworkPolicyName {
188 Unresolved(Ident),
189 Resolved(String),
190}
191
192impl AstDisplay for RawNetworkPolicyName {
193 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
194 match self {
195 RawNetworkPolicyName::Unresolved(id) => f.write_node(id),
196 RawNetworkPolicyName::Resolved(id) => {
197 f.write_str(format!("[{}]", id));
198 }
199 }
200 }
201}
202impl_display!(RawNetworkPolicyName);
203
204impl<T> FoldNode<Raw, T> for RawNetworkPolicyName
205where
206 T: AstInfo,
207{
208 type Folded = T::NetworkPolicyName;
209
210 fn fold<F>(self, f: &mut F) -> Self::Folded
211 where
212 F: Fold<Raw, T>,
213 {
214 f.fold_network_policy_name(self)
215 }
216}
217
218/// SQL data types
219#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
220pub enum RawDataType {
221 /// Array
222 Array(Box<RawDataType>),
223 /// List
224 List(Box<RawDataType>),
225 /// Map
226 Map {
227 key_type: Box<RawDataType>,
228 value_type: Box<RawDataType>,
229 },
230 /// Types who don't embed other types, e.g. INT
231 Other {
232 name: RawItemName,
233 /// Typ modifiers appended to the type name, e.g. `numeric(38,0)`.
234 typ_mod: Vec<i64>,
235 },
236}
237
238impl AstDisplay for RawDataType {
239 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
240 match self {
241 RawDataType::Array(ty) => {
242 f.write_node(&ty);
243 f.write_str("[]");
244 }
245 RawDataType::List(ty) => {
246 f.write_node(&ty);
247 f.write_str(" list");
248 }
249 RawDataType::Map {
250 key_type,
251 value_type,
252 } => {
253 f.write_str("map[");
254 f.write_node(&key_type);
255 f.write_str("=>");
256 f.write_node(&value_type);
257 f.write_str("]");
258 }
259 RawDataType::Other { name, typ_mod } => {
260 // If the first component of the type name clashes with a
261 // keyword that `parse_data_type` either dispatches into a
262 // special grammar (`map[...]`) or canonicalizes to a
263 // different spelling (`string` → `text`, `bigint` → `int8`,
264 // …), an unquoted emit would reparse to a different AST —
265 // either the canonicalizing branch fires and replaces the
266 // name outright, or (for a qualified name) the dispatch
267 // consumes the first part before the rest reaches the
268 // type-name parser. Force the always-quoted stable form in
269 // those cases. Keywords whose canonicalized name matches
270 // the keyword text itself (`bpchar`, `varchar`, `time`,
271 // `timestamp`, `timestamptz`) round-trip unquoted via the
272 // keyword path, so they're omitted here.
273 let first_ident_clashes = name
274 .name()
275 .0
276 .first()
277 .and_then(|id| id.as_keyword())
278 .map(data_type_keyword_needs_quoting)
279 .unwrap_or(false);
280 if first_ident_clashes {
281 f.write_str(&name.to_ast_string_stable());
282 } else {
283 f.write_node(name);
284 }
285 if typ_mod.len() > 0 {
286 f.write_str("(");
287 f.write_node(&display::comma_separated(typ_mod));
288 f.write_str(")");
289 }
290 }
291 }
292 }
293}
294impl_display!(RawDataType);
295
296/// Reports whether `kw`, as the first component of a data type name, would be
297/// reparsed differently by `Parser::parse_data_type` — either
298/// dispatched into a special grammar (`map[...]`) or canonicalized to a
299/// different spelling (`string` → `text`, `bigint` → `int8`, …). Such names
300/// must be emitted always-quoted to round-trip.
301///
302/// Keywords that `parse_data_type` parses back to themselves verbatim
303/// (`bpchar`, `varchar`, `time`, `timestamp`, `timestamptz`) round-trip
304/// unquoted and are intentionally excluded. Keep this in sync with the keyword
305/// arms of `parse_data_type`.
306fn data_type_keyword_needs_quoting(kw: mz_sql_lexer::keywords::Keyword) -> bool {
307 use mz_sql_lexer::keywords::*;
308 matches!(
309 kw,
310 MAP | STRING
311 | BIGINT
312 | SMALLINT
313 | DEC
314 | DECIMAL
315 | DOUBLE
316 | FLOAT
317 | INT
318 | INTEGER
319 | REAL
320 | BOOLEAN
321 | BYTES
322 | JSON
323 | CHAR
324 | CHARACTER
325 )
326}
327
328impl<T> FoldNode<Raw, T> for RawDataType
329where
330 T: AstInfo,
331{
332 type Folded = T::DataType;
333
334 fn fold<F>(self, f: &mut F) -> Self::Folded
335 where
336 F: Fold<Raw, T>,
337 {
338 f.fold_data_type(self)
339 }
340}