mz_deploy/project/compiler/object_validation/identifiers.rs
1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Identifier validation for database objects.
11//!
12//! Validates that identifiers (database, schema, object, and cluster names)
13//! follow Materialize's naming rules: lowercase letters, digits, underscores,
14//! and dollar signs, starting with a letter or underscore.
15
16use crate::project::ast::Statement;
17use crate::project::error::{ValidationError, ValidationErrorKind};
18use crate::project::ir::compiled::FullyQualifiedName;
19use std::path::PathBuf;
20
21/// The type of identifier being validated (for error messages).
22#[derive(Debug, Clone, Copy)]
23enum IdentifierKind {
24 Database,
25 Schema,
26 Object,
27 Cluster,
28}
29
30impl std::fmt::Display for IdentifierKind {
31 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32 match self {
33 Self::Database => write!(f, "database"),
34 Self::Schema => write!(f, "schema"),
35 Self::Object => write!(f, "object"),
36 Self::Cluster => write!(f, "cluster"),
37 }
38 }
39}
40
41/// Validates an identifier follows naming rules.
42///
43/// # Rules
44///
45/// - **Start Character**: Must begin with a lowercase letter (a-z, including letters with
46/// diacritical marks and non-Latin letters) or an underscore (_).
47/// - **Subsequent Characters**: Can include lowercase letters, digits (0-9), underscores (_),
48/// or dollar signs ($).
49/// - **Case**: All characters must be lowercase.
50///
51/// # Arguments
52///
53/// * `name` - The identifier name to validate
54/// * `kind` - The type of identifier (for error messages)
55///
56/// # Returns
57///
58/// * `Ok(())` if the identifier is valid
59/// * `Err(String)` with a descriptive error message if invalid
60///
61/// # Examples
62///
63/// ```text
64/// Valid identifiers:
65/// users, _temp, my_table, café, 日本語, user123, price$
66///
67/// Invalid identifiers:
68/// Users (uppercase)
69/// 123table (starts with digit)
70/// my-table (contains hyphen)
71/// MY_TABLE (uppercase)
72/// ```
73fn validate_identifier_format(name: &str, kind: IdentifierKind) -> Result<(), String> {
74 if name.is_empty() {
75 return Err(format!("{} name cannot be empty", kind));
76 }
77
78 let mut chars = name.chars().peekable();
79
80 // Check first character
81 if let Some(first) = chars.next() {
82 if first.is_uppercase() {
83 return Err(format!(
84 "{} name '{}' contains uppercase character '{}' at position 1. \
85 Identifiers must be lowercase.",
86 kind, name, first
87 ));
88 }
89
90 if first.is_ascii_digit() {
91 return Err(format!(
92 "{} name '{}' starts with digit '{}'. \
93 Identifiers must start with a letter or underscore.",
94 kind, name, first
95 ));
96 }
97
98 // First char must be a letter (including unicode letters) or underscore
99 if !first.is_alphabetic() && first != '_' {
100 return Err(format!(
101 "{} name '{}' starts with invalid character '{}'. \
102 Identifiers must start with a letter or underscore.",
103 kind, name, first
104 ));
105 }
106 }
107
108 // Check subsequent characters
109 for (pos, ch) in chars.enumerate() {
110 let position = pos + 2; // +2 because we already consumed first char and positions are 1-indexed
111
112 if ch.is_uppercase() {
113 return Err(format!(
114 "{} name '{}' contains uppercase character '{}' at position {}. \
115 Identifiers must be lowercase.",
116 kind, name, ch, position
117 ));
118 }
119
120 // Valid subsequent chars: letters (lowercase), digits, underscore, dollar sign
121 let is_valid = ch.is_alphabetic() || ch.is_ascii_digit() || ch == '_' || ch == '$';
122
123 if !is_valid {
124 return Err(format!(
125 "{} name '{}' contains invalid character '{}' at position {}. \
126 Identifiers can only contain letters, digits, underscores, and dollar signs.",
127 kind, name, ch, position
128 ));
129 }
130 }
131
132 Ok(())
133}
134
135/// Validates all identifiers in a FullyQualifiedName (database, schema, object).
136///
137/// # Arguments
138///
139/// * `fqn` - The fully qualified name to validate
140/// * `main_offset` - Byte offset of the CREATE statement being validated
141/// * `errors` - Vector to collect validation errors
142pub(super) fn validate_fqn_identifiers(
143 fqn: &FullyQualifiedName,
144 main_offset: usize,
145 errors: &mut Vec<ValidationError>,
146) {
147 // Validate database name
148 if let Err(reason) = validate_identifier_format(fqn.database(), IdentifierKind::Database) {
149 errors.push(ValidationError::with_file_and_offset(
150 ValidationErrorKind::InvalidIdentifier {
151 name: fqn.database().to_string(),
152 reason,
153 },
154 fqn.path.clone(),
155 main_offset,
156 ));
157 }
158
159 // Validate schema name
160 if let Err(reason) = validate_identifier_format(fqn.schema(), IdentifierKind::Schema) {
161 errors.push(ValidationError::with_file_and_offset(
162 ValidationErrorKind::InvalidIdentifier {
163 name: fqn.schema().to_string(),
164 reason,
165 },
166 fqn.path.clone(),
167 main_offset,
168 ));
169 }
170
171 // Validate object name
172 if let Err(reason) = validate_identifier_format(fqn.object(), IdentifierKind::Object) {
173 errors.push(ValidationError::with_file_and_offset(
174 ValidationErrorKind::InvalidIdentifier {
175 name: fqn.object().to_string(),
176 reason,
177 },
178 fqn.path.clone(),
179 main_offset,
180 ));
181 }
182}
183
184/// Validates a cluster name follows naming rules.
185///
186/// # Arguments
187///
188/// * `cluster_name` - The cluster name to validate
189/// * `path` - The file path (for error reporting)
190///
191/// # Returns
192///
193/// * `Ok(())` if valid
194/// * `Err(ValidationError)` if invalid
195pub(super) fn validate_cluster_name(
196 cluster_name: &str,
197 path: &PathBuf,
198 byte_offset: usize,
199) -> Result<(), ValidationError> {
200 validate_identifier_format(cluster_name, IdentifierKind::Cluster).map_err(|reason| {
201 ValidationError::with_file_and_offset(
202 ValidationErrorKind::InvalidIdentifier {
203 name: cluster_name.to_string(),
204 reason,
205 },
206 path.clone(),
207 byte_offset,
208 )
209 })
210}
211
212/// Validates that the statement's identifier matches the expected file path structure.
213///
214/// Ensures that the object name in the CREATE statement matches the file name, and
215/// that any schema/database qualifiers match the directory structure.
216///
217/// # Validation Rules
218///
219/// - The object name must match the file name (without `.sql` extension)
220/// - If the statement includes a schema qualifier, it must match the parent directory name
221/// - If the statement includes a database qualifier, it must match the grandparent directory name
222///
223/// # Examples
224///
225/// Valid mappings:
226/// ```text
227/// materialize/public/users.sql -> CREATE TABLE users (...)
228/// materialize/public/users.sql -> CREATE TABLE public.users (...)
229/// materialize/public/users.sql -> CREATE TABLE materialize.public.users (...)
230/// ```
231///
232/// Invalid mappings:
233/// ```text
234/// materialize/public/users.sql -> CREATE TABLE customers (...) X name mismatch
235/// materialize/public/users.sql -> CREATE TABLE private.users (...) X schema mismatch
236/// materialize/public/users.sql -> CREATE TABLE other.public.users (...) X database mismatch
237/// ```
238pub(super) fn validate_ident(
239 stmt: &Statement,
240 fqn: &FullyQualifiedName,
241 main_offset: usize,
242 errors: &mut Vec<ValidationError>,
243) {
244 let ident = stmt.ident();
245
246 // The object name in the statement must match the file name
247 if ident.object.as_str() != fqn.object() {
248 errors.push(ValidationError::with_file_and_offset(
249 ValidationErrorKind::ObjectNameMismatch {
250 declared: ident.object.to_string(),
251 expected: fqn.object().to_string(),
252 },
253 fqn.path.clone(),
254 main_offset,
255 ));
256 }
257
258 // If the statement includes a schema qualifier, validate it matches the path-derived schema
259 if let Some(ref stmt_schema) = ident.schema
260 && stmt_schema.as_str() != fqn.schema()
261 {
262 errors.push(ValidationError::with_file_and_offset(
263 ValidationErrorKind::SchemaMismatch {
264 declared: stmt_schema.to_string(),
265 expected: fqn.schema().to_string(),
266 },
267 fqn.path.clone(),
268 main_offset,
269 ));
270 }
271
272 // If the statement includes a database qualifier, validate it matches the path-derived database
273 if let Some(ref stmt_database) = ident.database
274 && stmt_database.as_str() != fqn.database()
275 {
276 errors.push(ValidationError::with_file_and_offset(
277 ValidationErrorKind::DatabaseMismatch {
278 declared: stmt_database.to_string(),
279 expected: fqn.database().to_string(),
280 },
281 fqn.path.clone(),
282 main_offset,
283 ));
284 }
285}