Skip to main content

mz_deploy/
diagnostics.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Source-positioned diagnostics decoupled from any output format.
11//!
12//! [`PositionalDiagnostic`] is the neutral intermediate representation: a
13//! severity, a file path, a source string, and a byte range within that
14//! source. The LSP server wraps it into [`tower_lsp::lsp_types::Diagnostic`]
15//! by converting the byte range to line/column via a [`ropey::Rope`]; the CLI
16//! wraps it into an [`annotate_snippets`] snippet for terminal output.
17//!
18//! Both consumers share the locator helpers in this module
19//! ([`find_identifier`], [`find_identifier_after`], [`locate_plan`],
20//! [`locate_catalog`], [`locate_typecheck`], [`locate_validation`]) which
21//! derive byte ranges from `mz_sql` and validation errors that carry only an
22//! identifier name (no offset). The module also exposes
23//! [`format_typecheck_kind`] and [`format_validation_kind`], the shared
24//! formatters that turn an error kind into a `(message, footers, suggestions)`
25//! triple — `footers` carry class-level advice, `suggestions` carry
26//! mechanical edits encoded as byte-range replacements.
27
28use std::ops::Range;
29use std::path::PathBuf;
30
31use mz_repr::ColumnName;
32use mz_sql::catalog::CatalogError;
33use mz_sql::names::PartialItemName;
34use mz_sql::plan::PlanError;
35
36use crate::project::compiler::typecheck::ObjectTypeCheckErrorKind;
37use crate::project::error::ValidationErrorKind;
38
39#[derive(Debug, Clone, Copy, PartialEq, Eq)]
40pub(crate) enum Severity {
41    Error,
42    Warning,
43}
44
45/// A diagnostic anchored to a byte range within a single source file.
46///
47/// Output-format-neutral: the LSP wraps it into `tower_lsp::Diagnostic`,
48/// the CLI wraps it into an `annotate_snippets` snippet.
49#[derive(Debug, Clone)]
50pub(crate) struct PositionalDiagnostic {
51    pub severity: Severity,
52    pub file: PathBuf,
53    /// The source text the byte range refers into. Owned so renderers
54    /// don't need separate filesystem access.
55    pub source: String,
56    /// Half-open byte range within `source`. May be empty (`start == end`)
57    /// for caret-style positions.
58    pub byte_range: Range<usize>,
59    pub message: String,
60    /// Plain help lines shown beneath the snippet.
61    pub footers: Vec<String>,
62    /// Structured replacement suggestions. Each renders as its own help
63    /// section with the patch shown inline (rustc-style "did you mean").
64    pub suggestions: Vec<Suggestion>,
65}
66
67/// One or more interchangeable replacements offered under a single help
68/// title. Renders as a rustc-style multi-suggestion block:
69///
70/// ```text
71/// help: did you mean one of these?
72///   |
73/// 4 -     custoser_name,
74/// 4 +     customer_id,
75///   |
76/// 4 -     custoser_name,
77/// 4 +     customer_name,
78///   |
79/// ```
80#[derive(Debug, Clone)]
81pub(crate) struct Suggestion {
82    pub label: String,
83    pub alternatives: Vec<Replacement>,
84}
85
86/// A single replacement: substitute `byte_range` of the source with
87/// `replacement`.
88#[derive(Debug, Clone)]
89pub(crate) struct Replacement {
90    pub byte_range: Range<usize>,
91    pub replacement: String,
92}
93
94/// Locate the byte range a typecheck error points at within `source`.
95///
96/// Dispatches to [`locate_plan`] / [`locate_catalog`] for the wrapped
97/// `mz_sql` error, or returns the parser's byte offset directly.
98/// `Internal` variants have no locatable position.
99pub(crate) fn locate_typecheck(
100    kind: &ObjectTypeCheckErrorKind,
101    source: &str,
102) -> Option<Range<usize>> {
103    match kind {
104        ObjectTypeCheckErrorKind::Parser(e) => {
105            let pos = e.error.pos;
106            Some(pos..pos)
107        }
108        ObjectTypeCheckErrorKind::Plan(e) => locate_plan(e, source),
109        ObjectTypeCheckErrorKind::Catalog(e) => locate_catalog(e, source),
110        ObjectTypeCheckErrorKind::Internal(_) => None,
111    }
112}
113
114/// Locate the byte range a [`PlanError`] points at within `source`.
115///
116/// For variants that carry a column or function name, finds the first
117/// whole-word occurrence in `source`. For variants that wrap a
118/// `ParserError`, returns its byte offset directly.
119pub(crate) fn locate_plan(e: &PlanError, source: &str) -> Option<Range<usize>> {
120    use PlanError::*;
121    match e {
122        UnknownColumn { column, .. }
123        | UngroupedColumn { column, .. }
124        | UnknownColumnInUsingClause { column, .. }
125        | AmbiguousColumnInUsingClause { column, .. }
126        | WrongJoinTypeForLateralColumn { column, .. } => find_identifier(source, column.as_str()),
127        AmbiguousColumn(column) => find_identifier(source, column.as_str()),
128        AmbiguousTable(name) => find_identifier(source, name.item.as_str()),
129        UnknownFunction { name, .. }
130        | IndistinctFunction { name, .. }
131        | UnknownOperator { name, .. }
132        | IndistinctOperator { name, .. } => find_identifier(source, last_component(name)),
133        Parser(p) => Some(p.pos..p.pos),
134        ParserStatement(p) => Some(p.error.pos..p.error.pos),
135        Catalog(c) => locate_catalog(c, source),
136        _ => None,
137    }
138}
139
140/// Locate the byte range a [`CatalogError`] points at within `source`.
141pub(crate) fn locate_catalog(e: &CatalogError, source: &str) -> Option<Range<usize>> {
142    use CatalogError::*;
143    match e {
144        UnknownDatabase(name)
145        | UnknownSchema(name)
146        | UnknownRole(name)
147        | UnknownCluster(name)
148        | UnknownClusterReplica(name)
149        | UnknownConnection(name)
150        | UnknownNetworkPolicy(name)
151        | UnknownItem(name) => find_identifier(source, last_component(name)),
152        UnknownFunction { name, .. } | UnknownType { name, .. } => {
153            find_identifier(source, last_component(name))
154        }
155        _ => None,
156    }
157}
158
159/// Strip qualifying prefixes from a dotted identifier, returning the final
160/// component. `schema.table` → `table`; `t` → `t`.
161pub(crate) fn last_component(s: &str) -> &str {
162    s.rsplit_once('.').map(|(_, last)| last).unwrap_or(s)
163}
164
165/// Find the first whole-word occurrence of `name` in `source`.
166///
167/// "Whole word" means the bytes adjacent to the match are not identifier
168/// characters (`[A-Za-z0-9_]`). Returns the half-open byte range of the
169/// match, or `None` if `name` does not appear as a standalone token.
170pub(crate) fn find_identifier(source: &str, name: &str) -> Option<Range<usize>> {
171    if name.is_empty() {
172        return None;
173    }
174    let bytes = source.as_bytes();
175    let needle = name.as_bytes();
176    if needle.len() > bytes.len() {
177        return None;
178    }
179    for start in 0..=(bytes.len() - needle.len()) {
180        if &bytes[start..start + needle.len()] != needle {
181            continue;
182        }
183        let before_ok = start == 0 || !is_ident_byte(bytes[start - 1]);
184        let end = start + needle.len();
185        let after_ok = end == bytes.len() || !is_ident_byte(bytes[end]);
186        if before_ok && after_ok {
187            return Some(start..end);
188        }
189    }
190    None
191}
192
193fn is_ident_byte(b: u8) -> bool {
194    b.is_ascii_alphanumeric() || b == b'_'
195}
196
197/// Same as [`find_identifier`] but starts the search at `start_byte`.
198/// Returns absolute byte ranges into `source`.
199pub(crate) fn find_identifier_after(
200    source: &str,
201    name: &str,
202    start_byte: usize,
203) -> Option<Range<usize>> {
204    let slice = source.get(start_byte..)?;
205    let local = find_identifier(slice, name)?;
206    Some((start_byte + local.start)..(start_byte + local.end))
207}
208
209/// Build the (message, footers, suggestions) triple for one typecheck kind.
210///
211/// Variants that carry alternatives (`UnknownColumn::similar`,
212/// `UnknownFunction::alternative`) are formatted directly so we control
213/// identifier quoting and can emit structured patches. Other variants fall
214/// back to `Display` + the upstream `hint()`.
215pub(crate) fn format_typecheck_kind(
216    kind: &ObjectTypeCheckErrorKind,
217    source: &str,
218    primary_range: &Range<usize>,
219) -> (String, Vec<String>, Vec<Suggestion>) {
220    match kind {
221        ObjectTypeCheckErrorKind::Plan(e) => format_plan(e, source, primary_range),
222        ObjectTypeCheckErrorKind::Catalog(e) => format_catalog(e, source, primary_range),
223        ObjectTypeCheckErrorKind::Parser(e) => (e.to_string(), Vec::new(), Vec::new()),
224        ObjectTypeCheckErrorKind::Internal(msg) => (msg.clone(), Vec::new(), Vec::new()),
225    }
226}
227
228fn format_plan(
229    e: &PlanError,
230    source: &str,
231    primary_range: &Range<usize>,
232) -> (String, Vec<String>, Vec<Suggestion>) {
233    if let PlanError::UnknownColumn {
234        table,
235        column,
236        similar,
237    } = e
238    {
239        let qualified = column_display(table.as_ref(), column);
240        let message = format!("column {qualified} does not exist");
241        if similar.is_empty() {
242            return (message, Vec::new(), Vec::new());
243        }
244        let span = locate_replacement(source, primary_range, column.as_str());
245        let label = match similar.as_ref() {
246            [single] => format!("did you mean `{}`?", column_display(table.as_ref(), single)),
247            _ => "did you mean one of these?".to_string(),
248        };
249        let alternatives = similar
250            .iter()
251            .map(|alt| Replacement {
252                byte_range: span.clone(),
253                replacement: alt.as_str().to_string(),
254            })
255            .collect();
256        return (
257            message,
258            Vec::new(),
259            vec![Suggestion {
260                label,
261                alternatives,
262            }],
263        );
264    }
265    fallback_plan(e)
266}
267
268fn fallback_plan(e: &PlanError) -> (String, Vec<String>, Vec<Suggestion>) {
269    let footers = e.hint().into_iter().collect();
270    (e.to_string(), footers, Vec::new())
271}
272
273fn format_catalog(
274    e: &CatalogError,
275    source: &str,
276    primary_range: &Range<usize>,
277) -> (String, Vec<String>, Vec<Suggestion>) {
278    match e {
279        CatalogError::UnknownFunction {
280            name,
281            alternative: Some(alt),
282        } => {
283            let message = format!("function {name} does not exist");
284            let suggestion = Suggestion {
285                label: format!("did you mean `{alt}`?"),
286                alternatives: vec![Replacement {
287                    byte_range: locate_replacement(source, primary_range, last_component(name)),
288                    replacement: alt.clone(),
289                }],
290            };
291            (message, Vec::new(), vec![suggestion])
292        }
293        other => fallback_catalog(other),
294    }
295}
296
297fn fallback_catalog(e: &CatalogError) -> (String, Vec<String>, Vec<Suggestion>) {
298    let footers = e.hint().into_iter().collect();
299    (e.to_string(), footers, Vec::new())
300}
301
302/// Format `table.column` as a dotted PostgreSQL reference (relation +
303/// column). Each component is rendered as its raw identifier — no outer
304/// quotes — so a reader interprets the dot as a separator rather than as
305/// part of a single quoted identifier.
306fn column_display(table: Option<&PartialItemName>, column: &ColumnName) -> String {
307    match table {
308        Some(t) => format!("{}.{}", t.item, column),
309        None => column.as_str().to_string(),
310    }
311}
312
313/// Find the byte range of `needle` to replace.
314///
315/// Prefer the primary annotation range when its content matches `needle`;
316/// otherwise fall back to a whole-word search of the source so the patch
317/// still lands somewhere reasonable for variants whose locator returned a
318/// less specific span.
319pub(crate) fn locate_replacement(
320    source: &str,
321    primary_range: &Range<usize>,
322    needle: &str,
323) -> Range<usize> {
324    let in_bounds = primary_range.end <= source.len() && primary_range.start <= primary_range.end;
325    if in_bounds && &source[primary_range.clone()] == needle {
326        return primary_range.clone();
327    }
328    find_identifier(source, needle).unwrap_or_else(|| primary_range.clone())
329}
330
331/// Locate the byte range of the declared identifier in a *Mismatch
332/// validation error. Returns `None` for variants that don't carry a
333/// `declared` name we can rewrite.
334pub(crate) fn locate_validation(
335    kind: &ValidationErrorKind,
336    source: &str,
337    statement_offset: Option<usize>,
338) -> Option<Range<usize>> {
339    let (needle, _) = mismatch_pair(kind)?;
340    find_identifier_after(source, needle, statement_offset.unwrap_or(0))
341}
342
343/// Build the (message, footers, suggestions) triple for a validation kind.
344///
345/// For *Mismatch variants the suggestion is a single replacement that
346/// rewrites the declared identifier to the expected one. Other variants
347/// surface only the message and any upstream `help()` text.
348pub(crate) fn format_validation_kind(
349    kind: &ValidationErrorKind,
350    source: &str,
351    primary_range: &Range<usize>,
352) -> (String, Vec<String>, Vec<Suggestion>) {
353    let message = kind.message();
354    let footers: Vec<String> = kind.help().into_iter().collect();
355    let suggestions = mismatch_suggestion(kind, source, primary_range);
356    (message, footers, suggestions)
357}
358
359/// `Some((declared, expected))` if `kind` is a rewritable *Mismatch variant
360/// — the trailing identifier the user wrote, plus the one their file path
361/// requires.
362fn mismatch_pair(kind: &ValidationErrorKind) -> Option<(&str, &str)> {
363    use ValidationErrorKind::*;
364    match kind {
365        ObjectNameMismatch { declared, expected }
366        | SchemaMismatch { declared, expected }
367        | DatabaseMismatch { declared, expected }
368        | ClusterNameMismatch { declared, expected }
369        | RoleNameMismatch { declared, expected }
370        | NetworkPolicyNameMismatch { declared, expected } => {
371            Some((declared.as_str(), expected.as_str()))
372        }
373        _ => None,
374    }
375}
376
377fn mismatch_suggestion(
378    kind: &ValidationErrorKind,
379    source: &str,
380    primary_range: &Range<usize>,
381) -> Vec<Suggestion> {
382    let Some((declared, expected)) = mismatch_pair(kind) else {
383        return Vec::new();
384    };
385    let span = locate_replacement(source, primary_range, declared);
386    vec![Suggestion {
387        label: format!("rename to `{expected}`"),
388        alternatives: vec![Replacement {
389            byte_range: span,
390            replacement: expected.to_string(),
391        }],
392    }]
393}
394
395#[cfg(test)]
396mod tests {
397    use super::*;
398    use mz_repr::ColumnName;
399    use std::sync::Arc;
400
401    #[mz_ore::test]
402    fn find_identifier_skips_substrings() {
403        let source = "SELECT customer_id, id FROM t";
404        let r = find_identifier(source, "id").unwrap();
405        assert_eq!(&source[r.clone()], "id");
406        assert_eq!(r.start, 20);
407    }
408
409    #[mz_ore::test]
410    fn find_identifier_empty_needle() {
411        assert!(find_identifier("anything", "").is_none());
412    }
413
414    #[mz_ore::test]
415    fn find_identifier_absent() {
416        assert!(find_identifier("SELECT 1", "missing").is_none());
417    }
418
419    #[mz_ore::test]
420    fn find_identifier_at_start() {
421        let r = find_identifier("foo bar", "foo").unwrap();
422        assert_eq!(r, 0..3);
423    }
424
425    #[mz_ore::test]
426    fn find_identifier_at_end() {
427        let r = find_identifier("foo bar", "bar").unwrap();
428        assert_eq!(r, 4..7);
429    }
430
431    #[mz_ore::test]
432    fn find_identifier_needle_longer_than_haystack() {
433        assert!(find_identifier("ab", "abcd").is_none());
434    }
435
436    #[mz_ore::test]
437    fn last_component_strips_qualifier() {
438        assert_eq!(last_component("foo"), "foo");
439        assert_eq!(last_component("schema.table"), "table");
440        assert_eq!(last_component("db.schema.table"), "table");
441    }
442
443    #[mz_ore::test]
444    fn locate_plan_unknown_column() {
445        let source = "CREATE VIEW v AS SELECT bogus FROM t";
446        let e = PlanError::UnknownColumn {
447            table: None,
448            column: ColumnName::from("bogus"),
449            similar: Box::new([]),
450        };
451        let r = locate_plan(&e, source).unwrap();
452        assert_eq!(&source[r.clone()], "bogus");
453        assert_eq!(r, 24..29);
454    }
455
456    #[mz_ore::test]
457    fn locate_plan_unknown_function() {
458        let source = "SELECT bogus_fn(1) FROM t";
459        let e = PlanError::UnknownFunction {
460            name: "bogus_fn".to_string(),
461            arg_types: vec!["int4".to_string()],
462        };
463        let r = locate_plan(&e, source).unwrap();
464        assert_eq!(&source[r], "bogus_fn");
465    }
466
467    #[mz_ore::test]
468    fn locate_plan_unhandled_variant_returns_none() {
469        let e = PlanError::Unstructured("anything".into());
470        assert!(locate_plan(&e, "SELECT 1").is_none());
471    }
472
473    #[mz_ore::test]
474    fn locate_catalog_unknown_item_strips_qualifier() {
475        let source = "SELECT * FROM bogus_table";
476        let e = CatalogError::UnknownItem("schema.bogus_table".to_string());
477        let r = locate_catalog(&e, source).unwrap();
478        assert_eq!(&source[r], "bogus_table");
479    }
480
481    #[mz_ore::test]
482    fn locate_typecheck_internal_returns_none() {
483        let kind = ObjectTypeCheckErrorKind::Internal("boom".into());
484        assert!(locate_typecheck(&kind, "anything").is_none());
485    }
486
487    #[mz_ore::test]
488    fn locate_typecheck_dispatches_to_plan() {
489        let source = "CREATE VIEW v AS SELECT bogus FROM t";
490        let kind = ObjectTypeCheckErrorKind::Plan(Arc::new(PlanError::UnknownColumn {
491            table: None,
492            column: ColumnName::from("bogus"),
493            similar: Box::new([]),
494        }));
495        let r = locate_typecheck(&kind, source).unwrap();
496        assert_eq!(&source[r], "bogus");
497    }
498
499    #[mz_ore::test]
500    fn locate_typecheck_dispatches_to_catalog() {
501        let source = "SELECT * FROM bogus_table";
502        let kind =
503            ObjectTypeCheckErrorKind::Catalog(CatalogError::UnknownItem("bogus_table".into()));
504        let r = locate_typecheck(&kind, source).unwrap();
505        assert_eq!(&source[r], "bogus_table");
506    }
507
508    #[mz_ore::test]
509    fn locate_replacement_prefers_primary_range_when_matches() {
510        let r = locate_replacement("SELECT emails FROM t", &(7..13), "emails");
511        assert_eq!(r, 7..13);
512    }
513
514    #[mz_ore::test]
515    fn locate_replacement_falls_back_to_search() {
516        let r = locate_replacement("SELECT emails FROM t", &(0..0), "emails");
517        assert_eq!(r, 7..13);
518    }
519
520    #[mz_ore::test]
521    fn find_identifier_after_skips_earlier_occurrence() {
522        let source = "CREATE TABLE foo (...);\nCREATE VIEW v AS SELECT * FROM foo;";
523        let r = find_identifier_after(source, "foo", 24).unwrap();
524        // Match should be the second `foo`, not the first.
525        assert!(r.start > 24);
526        assert_eq!(&source[r.clone()], "foo");
527    }
528
529    #[mz_ore::test]
530    fn locate_validation_object_name_mismatch_finds_declared_token() {
531        use crate::project::error::ValidationErrorKind;
532        let source = "CREATE TABLE customers (id INT);";
533        let kind = ValidationErrorKind::ObjectNameMismatch {
534            declared: "customers".to_string(),
535            expected: "users".to_string(),
536        };
537        let r = locate_validation(&kind, source, Some(0)).unwrap();
538        assert_eq!(&source[r], "customers");
539    }
540
541    #[mz_ore::test]
542    fn format_validation_kind_object_name_mismatch_yields_rename_suggestion() {
543        use crate::project::error::ValidationErrorKind;
544        let source = "CREATE TABLE customers (id INT);";
545        let kind = ValidationErrorKind::ObjectNameMismatch {
546            declared: "customers".to_string(),
547            expected: "users".to_string(),
548        };
549        let primary = locate_validation(&kind, source, Some(0)).unwrap();
550        let (msg, footers, suggestions) = format_validation_kind(&kind, source, &primary);
551        assert!(msg.contains("declared 'customers'"));
552        assert!(msg.contains("expected 'users'"));
553        // Footer carries the class-level rule (the "why").
554        assert!(
555            footers
556                .iter()
557                .any(|f| f.contains("must match the .sql file name"))
558        );
559        // Suggestion carries the mechanical edit (the "what").
560        assert!(suggestions[0].label.contains("users"));
561        assert_eq!(suggestions.len(), 1);
562        assert_eq!(suggestions[0].alternatives.len(), 1);
563        assert_eq!(suggestions[0].alternatives[0].replacement, "users");
564        assert_eq!(
565            &source[suggestions[0].alternatives[0].byte_range.clone()],
566            "customers"
567        );
568    }
569
570    #[mz_ore::test]
571    fn format_validation_kind_unhandled_returns_no_suggestions() {
572        use crate::project::error::ValidationErrorKind;
573        let kind = ValidationErrorKind::NoMainStatement {
574            object_name: "x".to_string(),
575        };
576        let (_msg, _footers, sugg) = format_validation_kind(&kind, "", &(0..0));
577        assert!(sugg.is_empty());
578    }
579}