Skip to main content

mz_deploy/lsp/
goto_definition.rs

1// Copyright Materialize, Inc. and contributors. All rights reserved.
2//
3// Use of this software is governed by the Business Source License
4// included in the LICENSE file.
5//
6// As of the Change Date specified in that file, in accordance with
7// the Business Source License, use of this software will be governed
8// by the Apache License, Version 2.0.
9
10//! Two-phase go-to-definition for SQL identifiers.
11//!
12//! Go-to-definition resolves a cursor position in a `.sql` file to the source
13//! file that defines the referenced object. The algorithm has two phases:
14//!
15//! ## Phase A — Find identifier at cursor
16//!
17//! [`find_reference_at_position()`] uses the lexer ([`mz_sql_lexer::lexer::lex()`])
18//! to tokenize the text, finds the token containing the cursor byte offset, and
19//! collects dot-separated identifier chains. For example, in `FROM myschema.orders`,
20//! clicking on either `myschema` or `orders` returns `["myschema", "orders"]`.
21//!
22//! Only [`Token::Ident`] tokens are considered identifiers. [`Token::Keyword`]
23//! tokens (e.g., `SELECT`, `FROM`) are not resolved. Tokens inside string
24//! literals are also ignored.
25//!
26//! ## Phase B — Resolve to file location
27//!
28//! [`resolve_reference()`] takes the identifier parts and the `ProjectCache`
29//! (SQLite), constructs an `ObjectId` using the same 1/2/3-part resolution as
30//! `ObjectId::from_item_name()`, looks up the object in the cache, and
31//! returns the file path from `CachedObject::file_path`.
32//!
33//! ## Examples
34//!
35//! ```text
36//! -- File: models/mydb/public/bar.sql
37//! CREATE VIEW bar AS SELECT * FROM foo;
38//!                                  ^^^
39//!                            cursor here
40//!
41//! Phase A: ["foo"]
42//! Phase B: default_db="mydb", default_schema="public"
43//!        → ObjectId("mydb.public.foo")
44//!        → models/mydb/public/foo.sql
45//! ```
46
47use crate::project::compiler::cache::ProjectCache;
48use crate::project::ir::object_id::ObjectId;
49use mz_sql_lexer::lexer::{self, Token};
50use std::path::Path;
51use tower_lsp::lsp_types::{Location, Range, Url};
52
53/// Find the dot-qualified identifier chain at the given byte offset.
54///
55/// Returns `None` if the cursor is not on an identifier token (e.g., on a
56/// keyword, operator, string literal, dot, or whitespace).
57///
58/// # Arguments
59/// * `text` — SQL source text.
60/// * `byte_offset` — Cursor position as a byte offset into `text`.
61///
62/// # Returns
63/// A vec of identifier parts (e.g., `["schema", "object"]`) or `None`.
64pub fn find_reference_at_position(text: &str, byte_offset: usize) -> Option<Vec<String>> {
65    let tokens = lexer::lex(text).ok()?;
66    if tokens.is_empty() {
67        return None;
68    }
69
70    // Find the index of the token containing byte_offset.
71    let token_idx = find_token_at_offset(&tokens, byte_offset, text.len())?;
72
73    // The token under the cursor must be an identifier or a keyword usable as identifier.
74    let token = &tokens[token_idx];
75    let ident_text = extract_ident_text(token)?;
76
77    // Collect the full dot-qualified chain by scanning backward and forward.
78    let mut parts = vec![ident_text];
79
80    // Scan backward: expect alternating Dot, Ident/Keyword
81    let mut i = token_idx;
82    while i >= 2 {
83        if matches!(tokens[i - 1].kind, Token::Dot) {
84            if let Some(s) = extract_ident_text(&tokens[i - 2]) {
85                parts.insert(0, s);
86                i -= 2;
87            } else {
88                break;
89            }
90        } else {
91            break;
92        }
93    }
94
95    // Scan forward: expect alternating Dot, Ident/Keyword
96    let mut j = token_idx;
97    while j + 2 < tokens.len() {
98        if matches!(tokens[j + 1].kind, Token::Dot) {
99            if let Some(s) = extract_ident_text(&tokens[j + 2]) {
100                parts.push(s);
101                j += 2;
102            } else {
103                break;
104            }
105        } else {
106            break;
107        }
108    }
109
110    // Only return chains that are part of a dot-qualified name, or a single
111    // ident (not a bare keyword like SELECT/FROM).
112    if parts.len() == 1 && matches!(token.kind, Token::Keyword(_)) {
113        return None;
114    }
115
116    Some(parts)
117}
118
119/// Extract identifier text from a token.
120///
121/// Returns the identifier string for `Token::Ident`, and the keyword string
122/// for `Token::Keyword` (since SQL keywords can be used as identifiers in
123/// dot-qualified names like `mydb.schema.t`). Returns `None` for all other
124/// token types.
125fn extract_ident_text(token: &lexer::PosToken) -> Option<String> {
126    match &token.kind {
127        Token::Ident(s) => Some(s.to_string()),
128        Token::Keyword(kw) => Some(kw.as_str().to_lowercase()),
129        _ => None,
130    }
131}
132
133/// Find the token index containing the given byte offset.
134fn find_token_at_offset(
135    tokens: &[lexer::PosToken],
136    byte_offset: usize,
137    text_len: usize,
138) -> Option<usize> {
139    for (i, token) in tokens.iter().enumerate() {
140        let start = token.offset;
141        let end = if i + 1 < tokens.len() {
142            tokens[i + 1].offset
143        } else {
144            text_len
145        };
146        if byte_offset >= start && byte_offset < end {
147            return Some(i);
148        }
149    }
150    None
151}
152
153/// Resolve identifier parts to an `ObjectId` using the file's path context.
154///
155/// Derives the default database/schema from the file's path relative to the
156/// project root (expects `models/<database>/<schema>/` structure), then
157/// constructs an `ObjectId` using 1/2/3-part name resolution.
158pub fn resolve_object_id(parts: &[String], file_uri: &Url, root: &Path) -> Option<ObjectId> {
159    let (default_db, default_schema) = ObjectId::default_db_schema_from_uri(file_uri, root)?;
160
161    match parts.len() {
162        1 => Some(ObjectId::new(default_db, default_schema, parts[0].clone())),
163        2 => Some(ObjectId::new(
164            default_db,
165            parts[0].clone(),
166            parts[1].clone(),
167        )),
168        3 => Some(ObjectId::new(
169            parts[0].clone(),
170            parts[1].clone(),
171            parts[2].clone(),
172        )),
173        _ => None,
174    }
175}
176
177/// Resolve identifier parts to a file location using the `ProjectCache`.
178///
179/// Derives the default database/schema from the file's path relative to the
180/// project root (expects `models/<database>/<schema>/` structure), then
181/// constructs an `ObjectId` and looks it up in the project cache.
182///
183/// # Returns
184/// A [`Location`] pointing to the start of the defining file, or `None` if the
185/// reference cannot be resolved (unknown object, external dependency, etc.).
186pub fn resolve_reference(
187    parts: &[String],
188    file_uri: &Url,
189    root: &Path,
190    project_cache: &ProjectCache,
191) -> Option<Location> {
192    let id = resolve_object_id(parts, file_uri, root)?;
193    let cached_obj = project_cache.get_object(&id)?;
194    let file_path = root.join(&cached_obj.file_path);
195    let uri = Url::from_file_path(&file_path).ok()?;
196
197    Some(Location {
198        uri,
199        range: Range::default(),
200    })
201}
202
203#[cfg(test)]
204mod tests {
205    use super::*;
206
207    #[mz_ore::test]
208    fn unqualified_identifier() {
209        let text = "SELECT * FROM foo";
210        // "foo" starts at byte 14
211        let parts = find_reference_at_position(text, 14).unwrap();
212        assert_eq!(parts, vec!["foo"]);
213    }
214
215    #[mz_ore::test]
216    fn schema_qualified_identifier() {
217        let text = "SELECT * FROM myschema.orders";
218        // cursor on "orders" (byte 23)
219        let parts = find_reference_at_position(text, 23).unwrap();
220        assert_eq!(parts, vec!["myschema", "orders"]);
221    }
222
223    #[mz_ore::test]
224    fn schema_qualified_cursor_on_schema() {
225        let text = "SELECT * FROM myschema.orders";
226        // cursor on "myschema" (byte 14)
227        let parts = find_reference_at_position(text, 14).unwrap();
228        assert_eq!(parts, vec!["myschema", "orders"]);
229    }
230
231    #[mz_ore::test]
232    fn fully_qualified_identifier() {
233        let text = "SELECT * FROM db.schema.t";
234        // cursor on "t" (byte 24)
235        let parts = find_reference_at_position(text, 24).unwrap();
236        assert_eq!(parts, vec!["db", "schema", "t"]);
237    }
238
239    #[mz_ore::test]
240    fn cursor_on_dot_returns_none() {
241        let text = "SELECT * FROM myschema.orders";
242        // The dot is at byte 22
243        let result = find_reference_at_position(text, 22);
244        assert!(result.is_none());
245    }
246
247    #[mz_ore::test]
248    fn cursor_on_string_literal_returns_none() {
249        let text = "SELECT 'hello' FROM foo";
250        // cursor inside string literal (byte 8)
251        let result = find_reference_at_position(text, 8);
252        assert!(result.is_none());
253    }
254
255    #[mz_ore::test]
256    fn cursor_on_keyword_returns_none() {
257        let text = "SELECT * FROM foo";
258        // cursor on "SELECT" (byte 0)
259        let result = find_reference_at_position(text, 0);
260        assert!(result.is_none());
261    }
262
263    #[mz_ore::test]
264    fn cursor_at_end_of_file_returns_none() {
265        let text = "SELECT 1";
266        let result = find_reference_at_position(text, text.len());
267        assert!(result.is_none());
268    }
269
270    #[mz_ore::test]
271    fn empty_file_returns_none() {
272        let result = find_reference_at_position("", 0);
273        assert!(result.is_none());
274    }
275
276    #[mz_ore::test]
277    fn quoted_identifier() {
278        let text = r#"SELECT * FROM "My Table""#;
279        // cursor inside quoted ident
280        let parts = find_reference_at_position(text, 15).unwrap();
281        assert_eq!(parts, vec!["My Table"]);
282    }
283
284    #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux`
285    #[mz_ore::test]
286    fn resolve_one_part_name() {
287        let (root, cache) = build_test_project_cache();
288        let file_uri = Url::from_file_path(root.path().join("models/mydb/public/bar.sql")).unwrap();
289
290        let location =
291            resolve_reference(&["foo".to_string()], &file_uri, root.path(), &cache).unwrap();
292        let expected_path = root.path().join("models/mydb/public/foo.sql");
293        assert_eq!(location.uri, Url::from_file_path(expected_path).unwrap());
294    }
295
296    #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux`
297    #[mz_ore::test]
298    fn resolve_two_part_name() {
299        let (root, cache) = build_test_project_cache();
300        let file_uri = Url::from_file_path(root.path().join("models/mydb/public/bar.sql")).unwrap();
301
302        let location = resolve_reference(
303            &["public".to_string(), "foo".to_string()],
304            &file_uri,
305            root.path(),
306            &cache,
307        )
308        .unwrap();
309        let expected_path = root.path().join("models/mydb/public/foo.sql");
310        assert_eq!(location.uri, Url::from_file_path(expected_path).unwrap());
311    }
312
313    #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux`
314    #[mz_ore::test]
315    fn resolve_three_part_name() {
316        let (root, cache) = build_test_project_cache();
317        let file_uri = Url::from_file_path(root.path().join("models/mydb/public/bar.sql")).unwrap();
318
319        let location = resolve_reference(
320            &["mydb".to_string(), "public".to_string(), "foo".to_string()],
321            &file_uri,
322            root.path(),
323            &cache,
324        )
325        .unwrap();
326        let expected_path = root.path().join("models/mydb/public/foo.sql");
327        assert_eq!(location.uri, Url::from_file_path(expected_path).unwrap());
328    }
329
330    #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux`
331    #[mz_ore::test]
332    fn resolve_unknown_name_returns_none() {
333        let (root, cache) = build_test_project_cache();
334        let file_uri = Url::from_file_path(root.path().join("models/mydb/public/bar.sql")).unwrap();
335
336        let result =
337            resolve_reference(&["nonexistent".to_string()], &file_uri, root.path(), &cache);
338        assert!(result.is_none());
339    }
340
341    #[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function `rust_psm_stack_pointer` on OS `linux`
342    #[mz_ore::test]
343    fn resolve_cross_schema_reference() {
344        let (root, cache) = build_test_project_cross_schema_cache();
345        let file_uri = Url::from_file_path(root.path().join("models/mydb/other/baz.sql")).unwrap();
346
347        let location = resolve_reference(
348            &["public".to_string(), "foo".to_string()],
349            &file_uri,
350            root.path(),
351            &cache,
352        )
353        .unwrap();
354        let expected_path = root.path().join("models/mydb/public/foo.sql");
355        assert_eq!(location.uri, Url::from_file_path(expected_path).unwrap());
356    }
357
358    fn build_test_project_cache() -> (tempfile::TempDir, ProjectCache) {
359        let root = tempfile::tempdir().unwrap();
360        let models = root.path().join("models/mydb/public");
361        std::fs::create_dir_all(&models).unwrap();
362        std::fs::write(models.join("foo.sql"), "CREATE VIEW foo AS SELECT 1 AS id;").unwrap();
363        std::fs::write(
364            models.join("bar.sql"),
365            "CREATE VIEW bar AS SELECT * FROM foo;",
366        )
367        .unwrap();
368        write_project_toml(root.path());
369
370        let _project = crate::project::plan_sync(
371            &crate::fs::FileSystem::new(),
372            root.path(),
373            None,
374            None,
375            &Default::default(),
376        )
377        .expect("project should compile");
378        let cache = ProjectCache::open(root.path(), "", None, &Default::default())
379            .expect("cache should open")
380            .expect("cache DB should exist");
381        (root, cache)
382    }
383
384    fn build_test_project_cross_schema_cache() -> (tempfile::TempDir, ProjectCache) {
385        let root = tempfile::tempdir().unwrap();
386
387        let storage = root.path().join("models/mydb/public");
388        std::fs::create_dir_all(&storage).unwrap();
389        std::fs::write(storage.join("foo.sql"), "CREATE TABLE foo (id INT);").unwrap();
390
391        let other = root.path().join("models/mydb/other");
392        std::fs::create_dir_all(&other).unwrap();
393        std::fs::write(
394            other.join("baz.sql"),
395            "CREATE VIEW baz AS SELECT * FROM mydb.public.foo;",
396        )
397        .unwrap();
398        write_project_toml(root.path());
399
400        let _project = crate::project::plan_sync(
401            &crate::fs::FileSystem::new(),
402            root.path(),
403            None,
404            None,
405            &Default::default(),
406        )
407        .expect("project should compile");
408        let cache = ProjectCache::open(root.path(), "", None, &Default::default())
409            .expect("cache should open")
410            .expect("cache DB should exist");
411        (root, cache)
412    }
413
414    fn write_project_toml(root: &Path) {
415        std::fs::write(root.join("project.toml"), "[project]\nname = \"test\"\n").unwrap();
416    }
417}