mz_sql_parser/ast/defs/name.rs
1// Copyright 2018 sqlparser-rs contributors. All rights reserved.
2// Copyright Materialize, Inc. and contributors. All rights reserved.
3//
4// This file is derived from the sqlparser-rs project, available at
5// https://github.com/andygrove/sqlparser-rs. It was incorporated
6// directly into Materialize on December 21, 2019.
7//
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License in the LICENSE file at the
11// root of this repository, or online at
12//
13// http://www.apache.org/licenses/LICENSE-2.0
14//
15// Unless required by applicable law or agreed to in writing, software
16// distributed under the License is distributed on an "AS IS" BASIS,
17// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18// See the License for the specific language governing permissions and
19// limitations under the License.
20
21use mz_ore::str::StrExt;
22use mz_sql_lexer::keywords::Keyword;
23use mz_sql_lexer::lexer::{IdentString, MAX_IDENTIFIER_LENGTH};
24use serde::{Deserialize, Serialize};
25use std::fmt;
26
27use crate::ast::display::{self, AstDisplay, AstFormatter};
28use crate::ast::{AstInfo, QualifiedReplica};
29
30/// An identifier.
31#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
32pub struct Ident(pub(crate) String);
33
34impl Ident {
35 /// Maximum length of an identifier in Materialize.
36 pub const MAX_LENGTH: usize = MAX_IDENTIFIER_LENGTH;
37
38 /// Create a new [`Ident`] with the given value, checking our invariants.
39 ///
40 /// # Examples
41 ///
42 /// ```
43 /// use mz_sql_parser::ast::Ident;
44 ///
45 /// let id = Ident::new("hello_world").unwrap();
46 /// assert_eq!(id.as_str(), "hello_world");
47 ///
48 /// let too_long = "I am a very long identifier that is more than 255 bytes long which is the max length for idents.\
49 /// 😊😁😅😂😬🍻😮💨😮🗽🛰️🌈😊😁😅😂😬🍻😮💨😮🗽🛰️🌈😊😁😅😂😬🍻😮💨😮🗽🛰️🌈";
50 /// assert_eq!(too_long.len(), 258);
51 ///
52 /// let too_long_id = Ident::new(too_long);
53 /// assert!(too_long_id.is_err());
54 ///
55 /// let invalid_name_dot = Ident::new(".");
56 /// assert!(invalid_name_dot.is_err());
57 ///
58 /// let invalid_name_dot_dot = Ident::new("..");
59 /// assert!(invalid_name_dot_dot.is_err());
60 /// ```
61 ///
62 pub fn new<S>(s: S) -> Result<Self, IdentError>
63 where
64 S: TryInto<IdentString>,
65 <S as TryInto<IdentString>>::Error: fmt::Display,
66 {
67 let s = s
68 .try_into()
69 .map_err(|e| IdentError::TooLong(e.to_string()))?;
70
71 if &*s == "." || &*s == ".." {
72 return Err(IdentError::Invalid(s.into_inner()));
73 }
74
75 Ok(Ident(s.into_inner()))
76 }
77
78 /// Create a new [`Ident`] modifying the given value as necessary to meet our invariants.
79 ///
80 /// # Examples
81 ///
82 /// ```
83 /// use mz_sql_parser::ast::Ident;
84 ///
85 /// let too_long = "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
86 /// 🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵\
87 /// 🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴";
88 ///
89 /// let id = Ident::new_lossy(too_long);
90 ///
91 /// // `new_lossy` will truncate the provided string, since it's too long. Note the missing
92 /// // `🔴` characters.
93 /// assert_eq!(id.as_str(), "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵");
94 /// ```
95 pub fn new_lossy<S: Into<String>>(value: S) -> Self {
96 let s: String = value.into();
97 if s.len() <= Self::MAX_LENGTH {
98 return Ident(s);
99 }
100
101 let mut byte_length = 0;
102 let s_truncated = s
103 .chars()
104 .take_while(|c| {
105 byte_length += c.len_utf8();
106 byte_length <= Self::MAX_LENGTH
107 })
108 .collect();
109
110 Ident(s_truncated)
111 }
112
113 /// Create a new [`Ident`] _without checking any of our invariants_.
114 ///
115 /// NOTE: Generally you __should not use this function__! If you're trying to create an
116 /// [`Ident`] from a `&'static str` you know is valid, use the [`ident!`] macro. For all other
117 /// use cases, see [`Ident::new`] which correctly checks our invariants.
118 ///
119 /// [`ident!`]: [`mz_sql_parser::ident`]
120 pub fn new_unchecked<S: Into<String>>(value: S) -> Self {
121 let s = value.into();
122 mz_ore::soft_assert_no_log!(s.len() <= Self::MAX_LENGTH);
123
124 Ident(s)
125 }
126
127 /// Generate a valid [`Ident`] with the provided `prefix` and `suffix`.
128 ///
129 /// # Examples
130 ///
131 /// ```
132 /// use mz_sql_parser::ast::{Ident, IdentError};
133 ///
134 /// let good_id =
135 /// Ident::try_generate_name("hello", "_world", |_| Ok::<_, IdentError>(true)).unwrap();
136 /// assert_eq!(good_id.as_str(), "hello_world");
137 ///
138 /// // Return invalid once.
139 /// let mut attempts = 0;
140 /// let one_failure = Ident::try_generate_name("hello", "_world", |_candidate| {
141 /// if attempts == 0 {
142 /// attempts += 1;
143 /// Ok::<_, IdentError>(false)
144 /// } else {
145 /// Ok(true)
146 /// }
147 /// })
148 /// .unwrap();
149 ///
150 /// // We "hello_world" was invalid, so we appended "_1".
151 /// assert_eq!(one_failure.as_str(), "hello_world_1");
152 /// ```
153 pub fn try_generate_name<P, S, F, E>(prefix: P, suffix: S, mut is_valid: F) -> Result<Self, E>
154 where
155 P: Into<String>,
156 S: Into<String>,
157 E: From<IdentError>,
158 F: FnMut(&Ident) -> Result<bool, E>,
159 {
160 const MAX_ATTEMPTS: usize = 1000;
161
162 let prefix: String = prefix.into();
163 let suffix: String = suffix.into();
164
165 // First just append the prefix and suffix.
166 let mut candidate = Ident(prefix.clone());
167 candidate.append_lossy(suffix.clone());
168 if is_valid(&candidate)? {
169 return Ok(candidate);
170 }
171
172 // Otherwise, append a number to the back.
173 for i in 1..MAX_ATTEMPTS {
174 let mut candidate = Ident(prefix.clone());
175 candidate.append_lossy(format!("{suffix}_{i}"));
176
177 if is_valid(&candidate)? {
178 return Ok(candidate);
179 }
180 }
181
182 // Couldn't find any valid name!
183 Err(E::from(IdentError::FailedToGenerate {
184 prefix,
185 suffix,
186 attempts: MAX_ATTEMPTS,
187 }))
188 }
189
190 /// Append the provided `suffix`, truncating `self` as necessary to satisfy our invariants.
191 ///
192 /// Note: We soft-assert that the provided `suffix` is not too long, if it is, we'll
193 /// truncate it.
194 ///
195 /// # Examples
196 ///
197 /// ```
198 /// use mz_sql_parser::{
199 /// ident,
200 /// ast::Ident,
201 /// };
202 ///
203 /// let mut id = ident!("🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵");
204 /// id.append_lossy("🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴");
205 ///
206 /// // We truncated the original ident, removing all '🔵' chars.
207 /// assert_eq!(id.as_str(), "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴");
208 /// ```
209 ///
210 /// ### Too long suffix
211 /// If the provided suffix is too long, we'll also truncate that.
212 ///
213 /// ```
214 /// # mz_ore::assert::SOFT_ASSERTIONS.store(false, std::sync::atomic::Ordering::Relaxed);
215 /// use mz_sql_parser::{
216 /// ident,
217 /// ast::Ident,
218 /// };
219 ///
220 /// let mut stem = ident!("hello___world");
221 ///
222 /// let too_long_suffix = "\
223 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
224 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
225 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
226 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🔵\
227 /// ";
228 ///
229 /// stem.append_lossy(too_long_suffix);
230 ///
231 /// // Notice the "hello___world" stem got truncated, as did the "🔵🔵" characters from the suffix.
232 /// let result = "hello___wor\
233 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
234 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
235 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
236 /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
237 /// ";
238 /// assert_eq!(stem.as_str(), result);
239 /// ```
240 pub fn append_lossy<S: Into<String>>(&mut self, suffix: S) {
241 // Make sure our suffix at least leaves a bit of room for the original ident.
242 const MAX_SUFFIX_LENGTH: usize = Ident::MAX_LENGTH - 8;
243
244 let mut suffix: String = suffix.into();
245 mz_ore::soft_assert_or_log!(suffix.len() <= MAX_SUFFIX_LENGTH, "suffix too long");
246
247 // Truncate the suffix as necessary.
248 if suffix.len() > MAX_SUFFIX_LENGTH {
249 let mut byte_length = 0;
250 suffix = suffix
251 .chars()
252 .take_while(|c| {
253 byte_length += c.len_utf8();
254 byte_length <= MAX_SUFFIX_LENGTH
255 })
256 .collect();
257 }
258
259 // Truncate ourselves as necessary.
260 let available_length = Ident::MAX_LENGTH - suffix.len();
261 if self.0.len() > available_length {
262 let mut byte_length = 0;
263 self.0 = self
264 .0
265 .chars()
266 .take_while(|c| {
267 byte_length += c.len_utf8();
268 byte_length <= available_length
269 })
270 .collect();
271 }
272
273 // Append the suffix.
274 self.0.push_str(&suffix);
275 }
276
277 /// An identifier can be printed in bare mode if
278 /// * it matches the regex [a-z_][a-z0-9_]* and
279 /// * it is not a "reserved keyword."
280 pub fn can_be_printed_bare(&self) -> bool {
281 let mut chars = self.0.chars();
282 chars
283 .next()
284 .map(|ch| matches!(ch, 'a'..='z' | '_'))
285 .unwrap_or(false)
286 && chars.all(|ch| matches!(ch, 'a'..='z' | '0'..='9' | '_'))
287 && !self
288 .as_keyword()
289 .map(Keyword::is_sometimes_reserved)
290 .unwrap_or(false)
291 }
292
293 pub fn as_str(&self) -> &str {
294 &self.0
295 }
296
297 pub fn as_keyword(&self) -> Option<Keyword> {
298 self.0.parse().ok()
299 }
300
301 pub fn into_string(self) -> String {
302 self.0
303 }
304}
305
306/// More-or-less a direct translation of the Postgres function for doing the same thing:
307///
308/// <https://github.com/postgres/postgres/blob/master/src/backend/utils/adt/ruleutils.c#L10730-L10812>
309///
310/// Quotation is forced when printing in Stable mode.
311impl AstDisplay for Ident {
312 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
313 if self.can_be_printed_bare() && !f.stable() {
314 f.write_str(&self.0);
315 } else {
316 f.write_str("\"");
317 for ch in self.0.chars() {
318 // Double up on double-quotes.
319 if ch == '"' {
320 f.write_str("\"");
321 }
322 f.write_str(ch);
323 }
324 f.write_str("\"");
325 }
326 }
327}
328impl_display!(Ident);
329
330#[derive(Clone, Debug, thiserror::Error)]
331pub enum IdentError {
332 #[error("identifier too long (len: {}, max: {}, value: {})", .0.len(), Ident::MAX_LENGTH, .0.quoted())]
333 TooLong(String),
334 #[error(
335 "failed to generate identifier with prefix '{prefix}' and suffix '{suffix}' after {attempts} attempts"
336 )]
337 FailedToGenerate {
338 prefix: String,
339 suffix: String,
340 attempts: usize,
341 },
342
343 #[error("invalid identifier: {}", .0.quoted())]
344 Invalid(String),
345}
346
347/// A name of a table, view, custom type, etc. that lives in a schema, possibly multi-part, i.e. db.schema.obj
348#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
349pub struct UnresolvedItemName(pub Vec<Ident>);
350
351pub enum CatalogName {
352 ItemName(Vec<Ident>),
353 FuncName(Vec<Ident>),
354}
355
356impl UnresolvedItemName {
357 /// Creates an `ItemName` with a single [`Ident`], i.e. it appears as
358 /// "unqualified".
359 pub fn unqualified(ident: Ident) -> UnresolvedItemName {
360 UnresolvedItemName(vec![ident])
361 }
362
363 /// Creates an `ItemName` with an [`Ident`] for each element of `n`.
364 ///
365 /// Panics if passed an in ineligible `&[&str]` whose length is 0 or greater
366 /// than 3.
367 pub fn qualified(n: &[Ident]) -> UnresolvedItemName {
368 assert!(n.len() <= 3 && n.len() > 0);
369 UnresolvedItemName(n.iter().cloned().collect::<Vec<_>>())
370 }
371}
372
373impl AstDisplay for UnresolvedItemName {
374 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
375 display::separated(&self.0, ".").fmt(f);
376 }
377}
378impl_display!(UnresolvedItemName);
379
380impl AstDisplay for &UnresolvedItemName {
381 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
382 display::separated(&self.0, ".").fmt(f);
383 }
384}
385
386/// A name of a schema
387#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
388pub struct UnresolvedSchemaName(pub Vec<Ident>);
389
390impl AstDisplay for UnresolvedSchemaName {
391 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
392 display::separated(&self.0, ".").fmt(f);
393 }
394}
395impl_display!(UnresolvedSchemaName);
396
397/// A name of a database
398#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
399pub struct UnresolvedDatabaseName(pub Ident);
400
401impl AstDisplay for UnresolvedDatabaseName {
402 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
403 f.write_node(&self.0);
404 }
405}
406impl_display!(UnresolvedDatabaseName);
407
408// The name of an item not yet created during name resolution, which should be
409// resolveable as an item name later.
410#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
411pub enum DeferredItemName<T: AstInfo> {
412 Named(T::ItemName),
413 Deferred(UnresolvedItemName),
414}
415
416impl<T: AstInfo> AstDisplay for DeferredItemName<T> {
417 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
418 match self {
419 DeferredItemName::Named(o) => f.write_node(o),
420 DeferredItemName::Deferred(o) => f.write_node(o),
421 }
422 }
423}
424impl_display_t!(DeferredItemName);
425
426#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
427pub enum UnresolvedObjectName {
428 Cluster(Ident),
429 ClusterReplica(QualifiedReplica),
430 Database(UnresolvedDatabaseName),
431 Schema(UnresolvedSchemaName),
432 Role(Ident),
433 Item(UnresolvedItemName),
434 NetworkPolicy(Ident),
435}
436
437impl AstDisplay for UnresolvedObjectName {
438 fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
439 match self {
440 UnresolvedObjectName::Cluster(n) => f.write_node(n),
441 UnresolvedObjectName::ClusterReplica(n) => f.write_node(n),
442 UnresolvedObjectName::Database(n) => f.write_node(n),
443 UnresolvedObjectName::Schema(n) => f.write_node(n),
444 UnresolvedObjectName::Role(n) => f.write_node(n),
445 UnresolvedObjectName::Item(n) => f.write_node(n),
446 UnresolvedObjectName::NetworkPolicy(n) => f.write_node(n),
447 }
448 }
449}
450impl_display!(UnresolvedObjectName);