Skip to main content

mz_deploy/project/compiler/object_validation/
identifiers.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Identifier validation for database objects.
11//!
12//! Validates that identifiers (database, schema, object, and cluster names)
13//! follow Materialize's naming rules: lowercase letters, digits, underscores,
14//! and dollar signs, starting with a letter or underscore.
15
16use crate::project::ast::Statement;
17use crate::project::error::{ValidationError, ValidationErrorKind};
18use crate::project::ir::compiled::FullyQualifiedName;
19use std::path::PathBuf;
20
21/// The type of identifier being validated (for error messages).
22#[derive(Debug, Clone, Copy)]
23enum IdentifierKind {
24    Database,
25    Schema,
26    Object,
27    Cluster,
28}
29
30impl std::fmt::Display for IdentifierKind {
31    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32        match self {
33            Self::Database => write!(f, "database"),
34            Self::Schema => write!(f, "schema"),
35            Self::Object => write!(f, "object"),
36            Self::Cluster => write!(f, "cluster"),
37        }
38    }
39}
40
41/// Validates an identifier follows naming rules.
42///
43/// # Rules
44///
45/// - **Start Character**: Must begin with a lowercase letter (a-z, including letters with
46///   diacritical marks and non-Latin letters) or an underscore (_).
47/// - **Subsequent Characters**: Can include lowercase letters, digits (0-9), underscores (_),
48///   or dollar signs ($).
49/// - **Case**: All characters must be lowercase.
50///
51/// # Arguments
52///
53/// * `name` - The identifier name to validate
54/// * `kind` - The type of identifier (for error messages)
55///
56/// # Returns
57///
58/// * `Ok(())` if the identifier is valid
59/// * `Err(String)` with a descriptive error message if invalid
60///
61/// # Examples
62///
63/// ```text
64/// Valid identifiers:
65///   users, _temp, my_table, café, 日本語, user123, price$
66///
67/// Invalid identifiers:
68///   Users (uppercase)
69///   123table (starts with digit)
70///   my-table (contains hyphen)
71///   MY_TABLE (uppercase)
72/// ```
73fn validate_identifier_format(name: &str, kind: IdentifierKind) -> Result<(), String> {
74    if name.is_empty() {
75        return Err(format!("{} name cannot be empty", kind));
76    }
77
78    let mut chars = name.chars().peekable();
79
80    // Check first character
81    if let Some(first) = chars.next() {
82        if first.is_uppercase() {
83            return Err(format!(
84                "{} name '{}' contains uppercase character '{}' at position 1. \
85                 Identifiers must be lowercase.",
86                kind, name, first
87            ));
88        }
89
90        if first.is_ascii_digit() {
91            return Err(format!(
92                "{} name '{}' starts with digit '{}'. \
93                 Identifiers must start with a letter or underscore.",
94                kind, name, first
95            ));
96        }
97
98        // First char must be a letter (including unicode letters) or underscore
99        if !first.is_alphabetic() && first != '_' {
100            return Err(format!(
101                "{} name '{}' starts with invalid character '{}'. \
102                 Identifiers must start with a letter or underscore.",
103                kind, name, first
104            ));
105        }
106    }
107
108    // Check subsequent characters
109    for (pos, ch) in chars.enumerate() {
110        let position = pos + 2; // +2 because we already consumed first char and positions are 1-indexed
111
112        if ch.is_uppercase() {
113            return Err(format!(
114                "{} name '{}' contains uppercase character '{}' at position {}. \
115                 Identifiers must be lowercase.",
116                kind, name, ch, position
117            ));
118        }
119
120        // Valid subsequent chars: letters (lowercase), digits, underscore, dollar sign
121        let is_valid = ch.is_alphabetic() || ch.is_ascii_digit() || ch == '_' || ch == '$';
122
123        if !is_valid {
124            return Err(format!(
125                "{} name '{}' contains invalid character '{}' at position {}. \
126                 Identifiers can only contain letters, digits, underscores, and dollar signs.",
127                kind, name, ch, position
128            ));
129        }
130    }
131
132    Ok(())
133}
134
135/// Validates all identifiers in a FullyQualifiedName (database, schema, object).
136///
137/// # Arguments
138///
139/// * `fqn` - The fully qualified name to validate
140/// * `main_offset` - Byte offset of the CREATE statement being validated
141/// * `errors` - Vector to collect validation errors
142pub(super) fn validate_fqn_identifiers(
143    fqn: &FullyQualifiedName,
144    main_offset: usize,
145    errors: &mut Vec<ValidationError>,
146) {
147    // Validate database name
148    if let Err(reason) = validate_identifier_format(fqn.database(), IdentifierKind::Database) {
149        errors.push(ValidationError::with_file_and_offset(
150            ValidationErrorKind::InvalidIdentifier {
151                name: fqn.database().to_string(),
152                reason,
153            },
154            fqn.path.clone(),
155            main_offset,
156        ));
157    }
158
159    // Validate schema name
160    if let Err(reason) = validate_identifier_format(fqn.schema(), IdentifierKind::Schema) {
161        errors.push(ValidationError::with_file_and_offset(
162            ValidationErrorKind::InvalidIdentifier {
163                name: fqn.schema().to_string(),
164                reason,
165            },
166            fqn.path.clone(),
167            main_offset,
168        ));
169    }
170
171    // Validate object name
172    if let Err(reason) = validate_identifier_format(fqn.object(), IdentifierKind::Object) {
173        errors.push(ValidationError::with_file_and_offset(
174            ValidationErrorKind::InvalidIdentifier {
175                name: fqn.object().to_string(),
176                reason,
177            },
178            fqn.path.clone(),
179            main_offset,
180        ));
181    }
182}
183
184/// Validates a cluster name follows naming rules.
185///
186/// # Arguments
187///
188/// * `cluster_name` - The cluster name to validate
189/// * `path` - The file path (for error reporting)
190///
191/// # Returns
192///
193/// * `Ok(())` if valid
194/// * `Err(ValidationError)` if invalid
195pub(super) fn validate_cluster_name(
196    cluster_name: &str,
197    path: &PathBuf,
198    byte_offset: usize,
199) -> Result<(), ValidationError> {
200    validate_identifier_format(cluster_name, IdentifierKind::Cluster).map_err(|reason| {
201        ValidationError::with_file_and_offset(
202            ValidationErrorKind::InvalidIdentifier {
203                name: cluster_name.to_string(),
204                reason,
205            },
206            path.clone(),
207            byte_offset,
208        )
209    })
210}
211
212/// Validates that the statement's identifier matches the expected file path structure.
213///
214/// Ensures that the object name in the CREATE statement matches the file name, and
215/// that any schema/database qualifiers match the directory structure.
216///
217/// # Validation Rules
218///
219/// - The object name must match the file name (without `.sql` extension)
220/// - If the statement includes a schema qualifier, it must match the parent directory name
221/// - If the statement includes a database qualifier, it must match the grandparent directory name
222///
223/// # Examples
224///
225/// Valid mappings:
226/// ```text
227/// materialize/public/users.sql  ->  CREATE TABLE users (...)
228/// materialize/public/users.sql  ->  CREATE TABLE public.users (...)
229/// materialize/public/users.sql  ->  CREATE TABLE materialize.public.users (...)
230/// ```
231///
232/// Invalid mappings:
233/// ```text
234/// materialize/public/users.sql  ->  CREATE TABLE customers (...)  X name mismatch
235/// materialize/public/users.sql  ->  CREATE TABLE private.users (...)  X schema mismatch
236/// materialize/public/users.sql  ->  CREATE TABLE other.public.users (...)  X database mismatch
237/// ```
238pub(super) fn validate_ident(
239    stmt: &Statement,
240    fqn: &FullyQualifiedName,
241    main_offset: usize,
242    errors: &mut Vec<ValidationError>,
243) {
244    let ident = stmt.ident();
245
246    // The object name in the statement must match the file name
247    if ident.object.as_str() != fqn.object() {
248        errors.push(ValidationError::with_file_and_offset(
249            ValidationErrorKind::ObjectNameMismatch {
250                declared: ident.object.to_string(),
251                expected: fqn.object().to_string(),
252            },
253            fqn.path.clone(),
254            main_offset,
255        ));
256    }
257
258    // If the statement includes a schema qualifier, validate it matches the path-derived schema
259    if let Some(ref stmt_schema) = ident.schema
260        && stmt_schema.as_str() != fqn.schema()
261    {
262        errors.push(ValidationError::with_file_and_offset(
263            ValidationErrorKind::SchemaMismatch {
264                declared: stmt_schema.to_string(),
265                expected: fqn.schema().to_string(),
266            },
267            fqn.path.clone(),
268            main_offset,
269        ));
270    }
271
272    // If the statement includes a database qualifier, validate it matches the path-derived database
273    if let Some(ref stmt_database) = ident.database
274        && stmt_database.as_str() != fqn.database()
275    {
276        errors.push(ValidationError::with_file_and_offset(
277            ValidationErrorKind::DatabaseMismatch {
278                declared: stmt_database.to_string(),
279                expected: fqn.database().to_string(),
280            },
281            fqn.path.clone(),
282            main_offset,
283        ));
284    }
285}