mz_sql_parser/ast/defs/
name.rs

1// Copyright 2018 sqlparser-rs contributors. All rights reserved.
2// Copyright Materialize, Inc. and contributors. All rights reserved.
3//
4// This file is derived from the sqlparser-rs project, available at
5// https://github.com/andygrove/sqlparser-rs. It was incorporated
6// directly into Materialize on December 21, 2019.
7//
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License in the LICENSE file at the
11// root of this repository, or online at
12//
13//     http://www.apache.org/licenses/LICENSE-2.0
14//
15// Unless required by applicable law or agreed to in writing, software
16// distributed under the License is distributed on an "AS IS" BASIS,
17// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18// See the License for the specific language governing permissions and
19// limitations under the License.
20
21use mz_ore::str::StrExt;
22use mz_sql_lexer::keywords::Keyword;
23use mz_sql_lexer::lexer::{IdentString, MAX_IDENTIFIER_LENGTH};
24use serde::{Deserialize, Serialize};
25use std::fmt;
26
27use crate::ast::display::{self, AstDisplay, AstFormatter};
28use crate::ast::{AstInfo, QualifiedReplica};
29
30/// An identifier.
31#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
32pub struct Ident(pub(crate) String);
33
34impl Ident {
35    /// Maximum length of an identifier in Materialize.
36    pub const MAX_LENGTH: usize = MAX_IDENTIFIER_LENGTH;
37
38    /// Create a new [`Ident`] with the given value, checking our invariants.
39    ///
40    /// # Examples
41    ///
42    /// ```
43    /// use mz_sql_parser::ast::Ident;
44    ///
45    /// let id = Ident::new("hello_world").unwrap();
46    /// assert_eq!(id.as_str(), "hello_world");
47    ///
48    /// let too_long = "I am a very long identifier that is more than 255 bytes long which is the max length for idents.\
49    /// 😊😁😅😂😬🍻😮‍💨😮🗽🛰️🌈😊😁😅😂😬🍻😮‍💨😮🗽🛰️🌈😊😁😅😂😬🍻😮‍💨😮🗽🛰️🌈";
50    /// assert_eq!(too_long.len(), 258);
51    ///
52    /// let too_long_id = Ident::new(too_long);
53    /// assert!(too_long_id.is_err());
54    ///
55    /// let invalid_name_dot = Ident::new(".");
56    /// assert!(invalid_name_dot.is_err());
57    ///
58    /// let invalid_name_dot_dot = Ident::new("..");
59    /// assert!(invalid_name_dot_dot.is_err());
60    /// ```
61    ///
62    pub fn new<S>(s: S) -> Result<Self, IdentError>
63    where
64        S: TryInto<IdentString>,
65        <S as TryInto<IdentString>>::Error: fmt::Display,
66    {
67        let s = s
68            .try_into()
69            .map_err(|e| IdentError::TooLong(e.to_string()))?;
70
71        if &*s == "." || &*s == ".." {
72            return Err(IdentError::Invalid(s.into_inner()));
73        }
74
75        Ok(Ident(s.into_inner()))
76    }
77
78    /// Create a new [`Ident`] modifying the given value as necessary to meet our invariants.
79    ///
80    /// # Examples
81    ///
82    /// ```
83    /// use mz_sql_parser::ast::Ident;
84    ///
85    /// let too_long = "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
86    /// 🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵\
87    /// 🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴";
88    ///
89    /// let id = Ident::new_lossy(too_long);
90    ///
91    /// // `new_lossy` will truncate the provided string, since it's too long. Note the missing
92    /// // `🔴` characters.
93    /// assert_eq!(id.as_str(), "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵");
94    /// ```
95    pub fn new_lossy<S: Into<String>>(value: S) -> Self {
96        let s: String = value.into();
97        if s.len() <= Self::MAX_LENGTH {
98            return Ident(s);
99        }
100
101        let mut byte_length = 0;
102        let s_truncated = s
103            .chars()
104            .take_while(|c| {
105                byte_length += c.len_utf8();
106                byte_length <= Self::MAX_LENGTH
107            })
108            .collect();
109
110        Ident(s_truncated)
111    }
112
113    /// Create a new [`Ident`] _without checking any of our invariants_.
114    ///
115    /// NOTE: Generally you __should not use this function__! If you're trying to create an
116    /// [`Ident`] from a `&'static str` you know is valid, use the [`ident!`] macro. For all other
117    /// use cases, see [`Ident::new`] which correctly checks our invariants.
118    ///
119    /// [`ident!`]: [`mz_sql_parser::ident`]
120    pub fn new_unchecked<S: Into<String>>(value: S) -> Self {
121        let s = value.into();
122        mz_ore::soft_assert_no_log!(s.len() <= Self::MAX_LENGTH);
123
124        Ident(s)
125    }
126
127    /// Generate a valid [`Ident`] with the provided `prefix` and `suffix`.
128    ///
129    /// # Examples
130    ///
131    /// ```
132    /// use mz_sql_parser::ast::{Ident, IdentError};
133    ///
134    /// let good_id =
135    ///   Ident::try_generate_name("hello", "_world", |_| Ok::<_, IdentError>(true)).unwrap();
136    /// assert_eq!(good_id.as_str(), "hello_world");
137    ///
138    /// // Return invalid once.
139    /// let mut attempts = 0;
140    /// let one_failure = Ident::try_generate_name("hello", "_world", |_candidate| {
141    ///     if attempts == 0 {
142    ///         attempts += 1;
143    ///         Ok::<_, IdentError>(false)
144    ///     } else {
145    ///         Ok(true)
146    ///     }
147    /// })
148    /// .unwrap();
149    ///
150    /// // We "hello_world" was invalid, so we appended "_1".
151    /// assert_eq!(one_failure.as_str(), "hello_world_1");
152    /// ```
153    pub fn try_generate_name<P, S, F, E>(prefix: P, suffix: S, mut is_valid: F) -> Result<Self, E>
154    where
155        P: Into<String>,
156        S: Into<String>,
157        E: From<IdentError>,
158        F: FnMut(&Ident) -> Result<bool, E>,
159    {
160        const MAX_ATTEMPTS: usize = 1000;
161
162        let prefix: String = prefix.into();
163        let suffix: String = suffix.into();
164
165        // First just append the prefix and suffix.
166        let mut candidate = Ident(prefix.clone());
167        candidate.append_lossy(suffix.clone());
168        if is_valid(&candidate)? {
169            return Ok(candidate);
170        }
171
172        // Otherwise, append a number to the back.
173        for i in 1..MAX_ATTEMPTS {
174            let mut candidate = Ident(prefix.clone());
175            candidate.append_lossy(format!("{suffix}_{i}"));
176
177            if is_valid(&candidate)? {
178                return Ok(candidate);
179            }
180        }
181
182        // Couldn't find any valid name!
183        Err(E::from(IdentError::FailedToGenerate {
184            prefix,
185            suffix,
186            attempts: MAX_ATTEMPTS,
187        }))
188    }
189
190    /// Append the provided `suffix`, truncating `self` as necessary to satisfy our invariants.
191    ///
192    /// Note: We soft-assert that the provided `suffix` is not too long, if it is, we'll
193    /// truncate it.
194    ///
195    /// # Examples
196    ///
197    /// ```
198    /// use mz_sql_parser::{
199    ///     ident,
200    ///     ast::Ident,
201    /// };
202    ///
203    /// let mut id = ident!("🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵");
204    /// id.append_lossy("🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴");
205    ///
206    /// // We truncated the original ident, removing all '🔵' chars.
207    /// assert_eq!(id.as_str(), "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴");
208    /// ```
209    ///
210    /// ### Too long suffix
211    /// If the provided suffix is too long, we'll also truncate that.
212    ///
213    /// ```
214    /// # mz_ore::assert::SOFT_ASSERTIONS.store(false, std::sync::atomic::Ordering::Relaxed);
215    /// use mz_sql_parser::{
216    ///     ident,
217    ///     ast::Ident,
218    /// };
219    ///
220    /// let mut stem = ident!("hello___world");
221    ///
222    /// let too_long_suffix = "\
223    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
224    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
225    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
226    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🔵\
227    /// ";
228    ///
229    /// stem.append_lossy(too_long_suffix);
230    ///
231    /// // Notice the "hello___world" stem got truncated, as did the "🔵🔵" characters from the suffix.
232    /// let result = "hello___wor\
233    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
234    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
235    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
236    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
237    /// ";
238    /// assert_eq!(stem.as_str(), result);
239    /// ```
240    pub fn append_lossy<S: Into<String>>(&mut self, suffix: S) {
241        // Make sure our suffix at least leaves a bit of room for the original ident.
242        const MAX_SUFFIX_LENGTH: usize = Ident::MAX_LENGTH - 8;
243
244        let mut suffix: String = suffix.into();
245        mz_ore::soft_assert_or_log!(suffix.len() <= MAX_SUFFIX_LENGTH, "suffix too long");
246
247        // Truncate the suffix as necessary.
248        if suffix.len() > MAX_SUFFIX_LENGTH {
249            let mut byte_length = 0;
250            suffix = suffix
251                .chars()
252                .take_while(|c| {
253                    byte_length += c.len_utf8();
254                    byte_length <= MAX_SUFFIX_LENGTH
255                })
256                .collect();
257        }
258
259        // Truncate ourselves as necessary.
260        let available_length = Ident::MAX_LENGTH - suffix.len();
261        if self.0.len() > available_length {
262            let mut byte_length = 0;
263            self.0 = self
264                .0
265                .chars()
266                .take_while(|c| {
267                    byte_length += c.len_utf8();
268                    byte_length <= available_length
269                })
270                .collect();
271        }
272
273        // Append the suffix.
274        self.0.push_str(&suffix);
275    }
276
277    /// An identifier can be printed in bare mode if
278    ///  * it matches the regex [a-z_][a-z0-9_]* and
279    ///  * it is not a "reserved keyword."
280    pub fn can_be_printed_bare(&self) -> bool {
281        let mut chars = self.0.chars();
282        chars
283            .next()
284            .map(|ch| matches!(ch, 'a'..='z' | '_'))
285            .unwrap_or(false)
286            && chars.all(|ch| matches!(ch, 'a'..='z' | '0'..='9' | '_'))
287            && !self
288                .as_keyword()
289                .map(Keyword::is_sometimes_reserved)
290                .unwrap_or(false)
291    }
292
293    pub fn as_str(&self) -> &str {
294        &self.0
295    }
296
297    pub fn as_keyword(&self) -> Option<Keyword> {
298        self.0.parse().ok()
299    }
300
301    pub fn into_string(self) -> String {
302        self.0
303    }
304}
305
306/// More-or-less a direct translation of the Postgres function for doing the same thing:
307///
308///   <https://github.com/postgres/postgres/blob/master/src/backend/utils/adt/ruleutils.c#L10730-L10812>
309///
310/// Quotation is forced when printing in Stable mode.
311impl AstDisplay for Ident {
312    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
313        if self.can_be_printed_bare() && !f.stable() {
314            f.write_str(&self.0);
315        } else {
316            f.write_str("\"");
317            for ch in self.0.chars() {
318                // Double up on double-quotes.
319                if ch == '"' {
320                    f.write_str("\"");
321                }
322                f.write_str(ch);
323            }
324            f.write_str("\"");
325        }
326    }
327}
328impl_display!(Ident);
329
330#[derive(Clone, Debug, thiserror::Error)]
331pub enum IdentError {
332    #[error("identifier too long (len: {}, max: {}, value: {})", .0.len(), Ident::MAX_LENGTH, .0.quoted())]
333    TooLong(String),
334    #[error(
335        "failed to generate identifier with prefix '{prefix}' and suffix '{suffix}' after {attempts} attempts"
336    )]
337    FailedToGenerate {
338        prefix: String,
339        suffix: String,
340        attempts: usize,
341    },
342
343    #[error("invalid identifier: {}", .0.quoted())]
344    Invalid(String),
345}
346
347/// A name of a table, view, custom type, etc. that lives in a schema, possibly multi-part, i.e. db.schema.obj
348#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
349pub struct UnresolvedItemName(pub Vec<Ident>);
350
351pub enum CatalogName {
352    ItemName(Vec<Ident>),
353    FuncName(Vec<Ident>),
354}
355
356impl UnresolvedItemName {
357    /// Creates an `ItemName` with a single [`Ident`], i.e. it appears as
358    /// "unqualified".
359    pub fn unqualified(ident: Ident) -> UnresolvedItemName {
360        UnresolvedItemName(vec![ident])
361    }
362
363    /// Creates an `ItemName` with an [`Ident`] for each element of `n`.
364    ///
365    /// Panics if passed an in ineligible `&[&str]` whose length is 0 or greater
366    /// than 3.
367    pub fn qualified(n: &[Ident]) -> UnresolvedItemName {
368        assert!(n.len() <= 3 && n.len() > 0);
369        UnresolvedItemName(n.iter().cloned().collect::<Vec<_>>())
370    }
371}
372
373impl AstDisplay for UnresolvedItemName {
374    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
375        display::separated(&self.0, ".").fmt(f);
376    }
377}
378impl_display!(UnresolvedItemName);
379
380impl AstDisplay for &UnresolvedItemName {
381    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
382        display::separated(&self.0, ".").fmt(f);
383    }
384}
385
386/// A name of a schema
387#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
388pub struct UnresolvedSchemaName(pub Vec<Ident>);
389
390impl AstDisplay for UnresolvedSchemaName {
391    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
392        display::separated(&self.0, ".").fmt(f);
393    }
394}
395impl_display!(UnresolvedSchemaName);
396
397/// A name of a database
398#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
399pub struct UnresolvedDatabaseName(pub Ident);
400
401impl AstDisplay for UnresolvedDatabaseName {
402    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
403        f.write_node(&self.0);
404    }
405}
406impl_display!(UnresolvedDatabaseName);
407
408// The name of an item not yet created during name resolution, which should be
409// resolveable as an item name later.
410#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
411pub enum DeferredItemName<T: AstInfo> {
412    Named(T::ItemName),
413    Deferred(UnresolvedItemName),
414}
415
416impl<T: AstInfo> AstDisplay for DeferredItemName<T> {
417    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
418        match self {
419            DeferredItemName::Named(o) => f.write_node(o),
420            DeferredItemName::Deferred(o) => f.write_node(o),
421        }
422    }
423}
424impl_display_t!(DeferredItemName);
425
426#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
427pub enum UnresolvedObjectName {
428    Cluster(Ident),
429    ClusterReplica(QualifiedReplica),
430    Database(UnresolvedDatabaseName),
431    Schema(UnresolvedSchemaName),
432    Role(Ident),
433    Item(UnresolvedItemName),
434    NetworkPolicy(Ident),
435}
436
437impl AstDisplay for UnresolvedObjectName {
438    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
439        match self {
440            UnresolvedObjectName::Cluster(n) => f.write_node(n),
441            UnresolvedObjectName::ClusterReplica(n) => f.write_node(n),
442            UnresolvedObjectName::Database(n) => f.write_node(n),
443            UnresolvedObjectName::Schema(n) => f.write_node(n),
444            UnresolvedObjectName::Role(n) => f.write_node(n),
445            UnresolvedObjectName::Item(n) => f.write_node(n),
446            UnresolvedObjectName::NetworkPolicy(n) => f.write_node(n),
447        }
448    }
449}
450impl_display!(UnresolvedObjectName);