Skip to main content

mz_sql_parser/ast/defs/
name.rs

1// Copyright 2018 sqlparser-rs contributors. All rights reserved.
2// Copyright Materialize, Inc. and contributors. All rights reserved.
3//
4// This file is derived from the sqlparser-rs project, available at
5// https://github.com/andygrove/sqlparser-rs. It was incorporated
6// directly into Materialize on December 21, 2019.
7//
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License in the LICENSE file at the
11// root of this repository, or online at
12//
13//     http://www.apache.org/licenses/LICENSE-2.0
14//
15// Unless required by applicable law or agreed to in writing, software
16// distributed under the License is distributed on an "AS IS" BASIS,
17// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18// See the License for the specific language governing permissions and
19// limitations under the License.
20
21use mz_ore::str::StrExt;
22use mz_sql_lexer::keywords::Keyword;
23use mz_sql_lexer::lexer::{IdentString, MAX_IDENTIFIER_LENGTH};
24use serde::{Deserialize, Serialize};
25use std::fmt;
26
27use crate::ast::display::{self, AstDisplay, AstFormatter};
28use crate::ast::{AstInfo, QualifiedReplica};
29
30/// An identifier.
31#[derive(
32    Debug,
33    Clone,
34    PartialEq,
35    Eq,
36    Hash,
37    PartialOrd,
38    Ord,
39    Serialize,
40    Deserialize
41)]
42pub struct Ident(pub(crate) String);
43
44impl Ident {
45    /// Maximum length of an identifier in Materialize.
46    pub const MAX_LENGTH: usize = MAX_IDENTIFIER_LENGTH;
47
48    /// Create a new [`Ident`] with the given value, checking our invariants.
49    ///
50    /// # Examples
51    ///
52    /// ```
53    /// use mz_sql_parser::ast::Ident;
54    ///
55    /// let id = Ident::new("hello_world").unwrap();
56    /// assert_eq!(id.as_str(), "hello_world");
57    ///
58    /// let too_long = "I am a very long identifier that is more than 255 bytes long which is the max length for idents.\
59    /// 😊😁😅😂😬🍻😮‍💨😮🗽🛰️🌈😊😁😅😂😬🍻😮‍💨😮🗽🛰️🌈😊😁😅😂😬🍻😮‍💨😮🗽🛰️🌈";
60    /// assert_eq!(too_long.len(), 258);
61    ///
62    /// let too_long_id = Ident::new(too_long);
63    /// assert!(too_long_id.is_err());
64    ///
65    /// let invalid_name_dot = Ident::new(".");
66    /// assert!(invalid_name_dot.is_err());
67    ///
68    /// let invalid_name_dot_dot = Ident::new("..");
69    /// assert!(invalid_name_dot_dot.is_err());
70    /// ```
71    ///
72    pub fn new<S>(s: S) -> Result<Self, IdentError>
73    where
74        S: TryInto<IdentString>,
75        <S as TryInto<IdentString>>::Error: fmt::Display,
76    {
77        let s = s
78            .try_into()
79            .map_err(|e| IdentError::TooLong(e.to_string()))?;
80
81        if &*s == "." || &*s == ".." {
82            return Err(IdentError::Invalid(s.into_inner()));
83        }
84
85        Ok(Ident(s.into_inner()))
86    }
87
88    /// Create a new [`Ident`] modifying the given value as necessary to meet our invariants.
89    ///
90    /// # Examples
91    ///
92    /// ```
93    /// use mz_sql_parser::ast::Ident;
94    ///
95    /// let too_long = "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
96    /// 🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵\
97    /// 🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴";
98    ///
99    /// let id = Ident::new_lossy(too_long);
100    ///
101    /// // `new_lossy` will truncate the provided string, since it's too long. Note the missing
102    /// // `🔴` characters.
103    /// assert_eq!(id.as_str(), "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵");
104    /// ```
105    pub fn new_lossy<S: Into<String>>(value: S) -> Self {
106        let s: String = value.into();
107        if s.len() <= Self::MAX_LENGTH {
108            return Ident(s);
109        }
110
111        let mut byte_length = 0;
112        let s_truncated = s
113            .chars()
114            .take_while(|c| {
115                byte_length += c.len_utf8();
116                byte_length <= Self::MAX_LENGTH
117            })
118            .collect();
119
120        Ident(s_truncated)
121    }
122
123    /// Create a new [`Ident`] _without checking any of our invariants_.
124    ///
125    /// NOTE: Generally you __should not use this function__! If you're trying to create an
126    /// [`Ident`] from a `&'static str` you know is valid, use the [`ident!`] macro. For all other
127    /// use cases, see [`Ident::new`] which correctly checks our invariants.
128    ///
129    /// [`ident!`]: [`mz_sql_parser::ident`]
130    pub fn new_unchecked<S: Into<String>>(value: S) -> Self {
131        let s = value.into();
132        mz_ore::soft_assert_no_log!(s.len() <= Self::MAX_LENGTH);
133
134        Ident(s)
135    }
136
137    /// Generate a valid [`Ident`] with the provided `prefix` and `suffix`.
138    ///
139    /// # Examples
140    ///
141    /// ```
142    /// use mz_sql_parser::ast::{Ident, IdentError};
143    ///
144    /// let good_id =
145    ///   Ident::try_generate_name("hello", "_world", |_| Ok::<_, IdentError>(true)).unwrap();
146    /// assert_eq!(good_id.as_str(), "hello_world");
147    ///
148    /// // Return invalid once.
149    /// let mut attempts = 0;
150    /// let one_failure = Ident::try_generate_name("hello", "_world", |_candidate| {
151    ///     if attempts == 0 {
152    ///         attempts += 1;
153    ///         Ok::<_, IdentError>(false)
154    ///     } else {
155    ///         Ok(true)
156    ///     }
157    /// })
158    /// .unwrap();
159    ///
160    /// // We "hello_world" was invalid, so we appended "_1".
161    /// assert_eq!(one_failure.as_str(), "hello_world_1");
162    /// ```
163    pub fn try_generate_name<P, S, F, E>(prefix: P, suffix: S, mut is_valid: F) -> Result<Self, E>
164    where
165        P: Into<String>,
166        S: Into<String>,
167        E: From<IdentError>,
168        F: FnMut(&Ident) -> Result<bool, E>,
169    {
170        const MAX_ATTEMPTS: usize = 1000;
171
172        let prefix: String = prefix.into();
173        let suffix: String = suffix.into();
174
175        // First just append the prefix and suffix.
176        let mut candidate = Ident(prefix.clone());
177        candidate.append_lossy(suffix.clone());
178        if is_valid(&candidate)? {
179            return Ok(candidate);
180        }
181
182        // Otherwise, append a number to the back.
183        for i in 1..MAX_ATTEMPTS {
184            let mut candidate = Ident(prefix.clone());
185            candidate.append_lossy(format!("{suffix}_{i}"));
186
187            if is_valid(&candidate)? {
188                return Ok(candidate);
189            }
190        }
191
192        // Couldn't find any valid name!
193        Err(E::from(IdentError::FailedToGenerate {
194            prefix,
195            suffix,
196            attempts: MAX_ATTEMPTS,
197        }))
198    }
199
200    /// Append the provided `suffix`, truncating `self` as necessary to satisfy our invariants.
201    ///
202    /// Note: We soft-assert that the provided `suffix` is not too long, if it is, we'll
203    /// truncate it.
204    ///
205    /// # Examples
206    ///
207    /// ```
208    /// use mz_sql_parser::{
209    ///     ident,
210    ///     ast::Ident,
211    /// };
212    ///
213    /// let mut id = ident!("🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵");
214    /// id.append_lossy("🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴");
215    ///
216    /// // We truncated the original ident, removing all '🔵' chars.
217    /// assert_eq!(id.as_str(), "🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴🔴");
218    /// ```
219    ///
220    /// ### Too long suffix
221    /// If the provided suffix is too long, we'll also truncate that.
222    ///
223    /// ```
224    /// # mz_ore::assert::SOFT_ASSERTIONS.store(false, std::sync::atomic::Ordering::Relaxed);
225    /// use mz_sql_parser::{
226    ///     ident,
227    ///     ast::Ident,
228    /// };
229    ///
230    /// let mut stem = ident!("hello___world");
231    ///
232    /// let too_long_suffix = "\
233    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
234    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
235    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
236    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🔵\
237    /// ";
238    ///
239    /// stem.append_lossy(too_long_suffix);
240    ///
241    /// // Notice the "hello___world" stem got truncated, as did the "🔵🔵" characters from the suffix.
242    /// let result = "hello___wor\
243    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
244    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
245    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
246    /// 🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢\
247    /// ";
248    /// assert_eq!(stem.as_str(), result);
249    /// ```
250    pub fn append_lossy<S: Into<String>>(&mut self, suffix: S) {
251        // Make sure our suffix at least leaves a bit of room for the original ident.
252        const MAX_SUFFIX_LENGTH: usize = Ident::MAX_LENGTH - 8;
253
254        let mut suffix: String = suffix.into();
255        mz_ore::soft_assert_or_log!(suffix.len() <= MAX_SUFFIX_LENGTH, "suffix too long");
256
257        // Truncate the suffix as necessary.
258        if suffix.len() > MAX_SUFFIX_LENGTH {
259            let mut byte_length = 0;
260            suffix = suffix
261                .chars()
262                .take_while(|c| {
263                    byte_length += c.len_utf8();
264                    byte_length <= MAX_SUFFIX_LENGTH
265                })
266                .collect();
267        }
268
269        // Truncate ourselves as necessary.
270        let available_length = Ident::MAX_LENGTH - suffix.len();
271        if self.0.len() > available_length {
272            let mut byte_length = 0;
273            self.0 = self
274                .0
275                .chars()
276                .take_while(|c| {
277                    byte_length += c.len_utf8();
278                    byte_length <= available_length
279                })
280                .collect();
281        }
282
283        // Append the suffix.
284        self.0.push_str(&suffix);
285    }
286
287    /// An identifier can be printed in bare mode if
288    ///  * it matches the regex `[a-z_][a-z0-9_]*` and
289    ///  * it is not a "reserved keyword."
290    pub fn can_be_printed_bare(&self) -> bool {
291        let mut chars = self.0.chars();
292        chars
293            .next()
294            .map(|ch| matches!(ch, 'a'..='z' | '_'))
295            .unwrap_or(false)
296            && chars.all(|ch| matches!(ch, 'a'..='z' | '0'..='9' | '_'))
297            && !self
298                .as_keyword()
299                .map(Keyword::is_sometimes_reserved)
300                .unwrap_or(false)
301    }
302
303    pub fn as_str(&self) -> &str {
304        &self.0
305    }
306
307    pub fn as_keyword(&self) -> Option<Keyword> {
308        self.0.parse().ok()
309    }
310
311    pub fn into_string(self) -> String {
312        self.0
313    }
314}
315
316/// More-or-less a direct translation of the Postgres function for doing the same thing:
317///
318///   <https://github.com/postgres/postgres/blob/master/src/backend/utils/adt/ruleutils.c#L10730-L10812>
319///
320/// Quotation is forced when printing in Stable mode.
321impl AstDisplay for Ident {
322    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
323        if self.can_be_printed_bare() && !f.stable() {
324            f.write_str(&self.0);
325        } else {
326            f.write_str("\"");
327            for ch in self.0.chars() {
328                // Double up on double-quotes.
329                if ch == '"' {
330                    f.write_str("\"");
331                }
332                f.write_str(ch);
333            }
334            f.write_str("\"");
335        }
336    }
337}
338impl_display!(Ident);
339
340#[derive(Clone, Debug, thiserror::Error)]
341pub enum IdentError {
342    #[error("identifier too long (len: {}, max: {}, value: {})", .0.len(), Ident::MAX_LENGTH, .0.quoted())]
343    TooLong(String),
344    #[error(
345        "failed to generate identifier with prefix '{prefix}' and suffix '{suffix}' after {attempts} attempts"
346    )]
347    FailedToGenerate {
348        prefix: String,
349        suffix: String,
350        attempts: usize,
351    },
352
353    #[error("invalid identifier: {}", .0.quoted())]
354    Invalid(String),
355}
356
357/// A name of a table, view, custom type, etc. that lives in a schema, possibly multi-part, i.e. db.schema.obj
358#[derive(
359    Debug,
360    Clone,
361    PartialEq,
362    Eq,
363    Hash,
364    PartialOrd,
365    Ord,
366    Serialize,
367    Deserialize
368)]
369pub struct UnresolvedItemName(pub Vec<Ident>);
370
371pub enum CatalogName {
372    ItemName(Vec<Ident>),
373    FuncName(Vec<Ident>),
374}
375
376impl UnresolvedItemName {
377    /// Creates an `ItemName` with a single [`Ident`], i.e. it appears as
378    /// "unqualified".
379    pub fn unqualified(ident: Ident) -> UnresolvedItemName {
380        UnresolvedItemName(vec![ident])
381    }
382
383    /// Creates an `ItemName` with an [`Ident`] for each element of `n`.
384    ///
385    /// Panics if passed an in ineligible `&[&str]` whose length is 0 or greater
386    /// than 3.
387    pub fn qualified(n: &[Ident]) -> UnresolvedItemName {
388        assert!(n.len() <= 3 && n.len() > 0);
389        UnresolvedItemName(n.iter().cloned().collect::<Vec<_>>())
390    }
391}
392
393impl AstDisplay for UnresolvedItemName {
394    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
395        display::separated(&self.0, ".").fmt(f);
396    }
397}
398impl_display!(UnresolvedItemName);
399
400impl AstDisplay for &UnresolvedItemName {
401    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
402        display::separated(&self.0, ".").fmt(f);
403    }
404}
405
406/// A name of a schema
407#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
408pub struct UnresolvedSchemaName(pub Vec<Ident>);
409
410impl AstDisplay for UnresolvedSchemaName {
411    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
412        display::separated(&self.0, ".").fmt(f);
413    }
414}
415impl_display!(UnresolvedSchemaName);
416
417/// A name of a database
418#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
419pub struct UnresolvedDatabaseName(pub Ident);
420
421impl AstDisplay for UnresolvedDatabaseName {
422    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
423        f.write_node(&self.0);
424    }
425}
426impl_display!(UnresolvedDatabaseName);
427
428// The name of an item not yet created during name resolution, which should be
429// resolveable as an item name later.
430#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
431pub enum DeferredItemName<T: AstInfo> {
432    Named(T::ItemName),
433    Deferred(UnresolvedItemName),
434}
435
436impl<T: AstInfo> AstDisplay for DeferredItemName<T> {
437    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
438        match self {
439            DeferredItemName::Named(o) => f.write_node(o),
440            DeferredItemName::Deferred(o) => f.write_node(o),
441        }
442    }
443}
444impl_display_t!(DeferredItemName);
445
446#[derive(Debug, PartialEq, Eq, Hash, Clone, PartialOrd, Ord)]
447pub enum UnresolvedObjectName {
448    Cluster(Ident),
449    ClusterReplica(QualifiedReplica),
450    Database(UnresolvedDatabaseName),
451    Schema(UnresolvedSchemaName),
452    Role(Ident),
453    Item(UnresolvedItemName),
454    NetworkPolicy(Ident),
455}
456
457impl AstDisplay for UnresolvedObjectName {
458    fn fmt<W: fmt::Write>(&self, f: &mut AstFormatter<W>) {
459        match self {
460            UnresolvedObjectName::Cluster(n) => f.write_node(n),
461            UnresolvedObjectName::ClusterReplica(n) => f.write_node(n),
462            UnresolvedObjectName::Database(n) => f.write_node(n),
463            UnresolvedObjectName::Schema(n) => f.write_node(n),
464            UnresolvedObjectName::Role(n) => f.write_node(n),
465            UnresolvedObjectName::Item(n) => f.write_node(n),
466            UnresolvedObjectName::NetworkPolicy(n) => f.write_node(n),
467        }
468    }
469}
470impl_display!(UnresolvedObjectName);