feat: add go to definition to lsp (#4297)

benfdking · web-flow · commit 029acc6c351d · 2025-05-02T18:08:32.000+01:00
diff --git a/Makefile b/Makefile
@@ -109,7 +109,7 @@ guard-%:
 	fi
 
 engine-%-install:
-	pip3 install -e ".[dev,web,slack,${*}]" ./examples/custom_materializations
+	pip3 install -e ".[dev,web,slack,lsp,${*}]" ./examples/custom_materializations
 
 engine-docker-%-up:
 	docker compose -f ./tests/core/engine_adapter/integration/docker/compose.${*}.yaml up -d
diff --git a/sqlmesh/lsp/context.py b/sqlmesh/lsp/context.py
@@ -0,0 +1,19 @@
+from collections import defaultdict
+from pathlib import Path
+from sqlmesh.core.context import Context
+import typing as t
+
+
+class LSPContext:
+    """
+    A context that is used for linting. It contains the context and a reverse map of file uri to model names .
+    """
+
+    def __init__(self, context: Context) -> None:
+        self.context = context
+        map: t.Dict[str, t.List[str]] = defaultdict(list)
+        for model in context.models.values():
+            if model._path is not None:
+                path = Path(model._path).resolve()
+                map[f"file://{path.as_posix()}"].append(model.name)
+        self.map = map
diff --git a/sqlmesh/lsp/main.py b/sqlmesh/lsp/main.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 """A Language Server Protocol (LSP) server for SQL with SQLMesh integration, refactored without globals."""
 
-from collections import defaultdict
 import logging
 import typing as t
 from pathlib import Path
@@ -12,21 +11,8 @@
 from sqlmesh._version import __version__
 from sqlmesh.core.context import Context
 from sqlmesh.core.linter.definition import AnnotatedRuleViolation
-
-
-class LSPContext:
-    """
-    A context that is used for linting. It contains the context and a reverse map of file uri to model names .
-    """
-
-    def __init__(self, context: Context) -> None:
-        self.context = context
-        map: t.Dict[str, t.List[str]] = defaultdict(list)
-        for model in context.models.values():
-            if model._path is not None:
-                path = Path(model._path).resolve()
-                map[f"file://{path.as_posix()}"].append(model.name)
-        self.map = map
+from sqlmesh.lsp.context import LSPContext
+from sqlmesh.lsp.reference import get_model_definitions_for_a_path
 
 
 class SQLMeshLanguageServer:
@@ -144,6 +130,43 @@ def formatting(
                 ls.show_message(f"Error formatting SQL: {e}", types.MessageType.Error)
                 return []
 
+        @self.server.feature(types.TEXT_DOCUMENT_DEFINITION)
+        def goto_definition(
+            ls: LanguageServer, params: types.DefinitionParams
+        ) -> t.List[types.LocationLink]:
+            """Jump to an object's definition."""
+            try:
+                self._ensure_context_for_document(params.text_document.uri)
+                document = ls.workspace.get_document(params.text_document.uri)
+                if self.lsp_context is None:
+                    raise RuntimeError(f"No context found for document: {document.path}")
+
+                references = get_model_definitions_for_a_path(
+                    self.lsp_context, params.text_document.uri
+                )
+                if not references:
+                    return []
+
+                return [
+                    types.LocationLink(
+                        target_uri=reference.uri,
+                        target_selection_range=types.Range(
+                            start=types.Position(line=0, character=0),
+                            end=types.Position(line=0, character=0),
+                        ),
+                        target_range=types.Range(
+                            start=types.Position(line=0, character=0),
+                            end=types.Position(line=0, character=0),
+                        ),
+                        origin_selection_range=reference.range,
+                    )
+                    for reference in references
+                ]
+
+            except Exception as e:
+                ls.show_message(f"Error getting references: {e}", types.MessageType.Error)
+                return []
+
     def _context_get_or_load(self, document_uri: str) -> LSPContext:
         if self.lsp_context is None:
             self._ensure_context_for_document(document_uri)
diff --git a/sqlmesh/lsp/reference.py b/sqlmesh/lsp/reference.py
@@ -0,0 +1,161 @@
+from lsprotocol.types import Range, Position
+import typing as t
+
+from sqlmesh.core.dialect import normalize_model_name
+from sqlmesh.core.model.definition import SqlModel
+from sqlmesh.lsp.context import LSPContext
+from sqlglot import exp
+
+from sqlmesh.utils.pydantic import PydanticModel
+
+
+class Reference(PydanticModel):
+    range: Range
+    uri: str
+
+
+def get_model_definitions_for_a_path(
+    lint_context: LSPContext, document_uri: str
+) -> t.List[Reference]:
+    """
+    Get the model references for a given path.
+
+    Works for models and audits.
+    Works for targeting sql and python models.
+
+    Steps:
+    - Get the parsed query
+    - Find all table objects using find_all exp.Table
+        - Match the string against all model names
+    - Need to normalize it before matching
+    - Try get_model before normalization
+    - Match to models that the model refers to
+    """
+    # Ensure the path is a sql model
+    if not document_uri.endswith(".sql"):
+        return []
+
+    # Get the model
+    models = lint_context.map[document_uri]
+    if not models:
+        return []
+    model = lint_context.context.get_model(model_or_snapshot=models[0], raise_if_missing=False)
+    if model is None or not isinstance(model, SqlModel):
+        return []
+
+    # Find all possible references
+    references = []
+    tables = list(model.query.find_all(exp.Table))
+    if len(tables) == 0:
+        return []
+
+    read_file = open(model._path, "r").readlines()
+
+    for table in tables:
+        depends_on = model.depends_on
+
+        # Normalize the table reference
+        reference_name = table.sql(dialect=model.dialect)
+        normalized_reference_name = normalize_model_name(
+            reference_name,
+            default_catalog=lint_context.context.default_catalog,
+            dialect=model.dialect,
+        )
+        if normalized_reference_name not in depends_on:
+            continue
+
+        # Get the referenced model uri
+        referenced_model = lint_context.context.get_model(
+            model_or_snapshot=normalized_reference_name, raise_if_missing=False
+        )
+        if referenced_model is None:
+            continue
+        referenced_model_path = referenced_model._path
+        # Check whether the path exists
+        if not referenced_model_path.is_file():
+            continue
+        referenced_model_uri = f"file://{referenced_model_path}"
+
+        # Extract metadata for positioning
+        table_meta = TokenPositionDetails.from_meta(table.this.meta)
+        table_range = _range_from_token_position_details(table_meta, read_file)
+        start_pos = table_range.start
+        end_pos = table_range.end
+
+        # If there's a catalog or database qualifier, adjust the start position
+        catalog_or_db = table.args.get("catalog") or table.args.get("db")
+        if catalog_or_db is not None:
+            catalog_or_db_meta = TokenPositionDetails.from_meta(catalog_or_db.meta)
+            catalog_or_db_range = _range_from_token_position_details(catalog_or_db_meta, read_file)
+            start_pos = catalog_or_db_range.start
+
+        references.append(
+            Reference(uri=referenced_model_uri, range=Range(start=start_pos, end=end_pos))
+        )
+
+    return references
+
+
+class TokenPositionDetails(PydanticModel):
+    """
+    Details about a token's position in the source code.
+
+    Attributes:
+        line (int): The line that the token ends on.
+        col (int): The column that the token ends on.
+        start (int): The start index of the token.
+        end (int): The ending index of the token.
+    """
+
+    line: int
+    col: int
+    start: int
+    end: int
+
+    @staticmethod
+    def from_meta(meta: t.Dict[str, int]) -> "TokenPositionDetails":
+        return TokenPositionDetails(
+            line=meta["line"],
+            col=meta["col"],
+            start=meta["start"],
+            end=meta["end"],
+        )
+
+
+def _range_from_token_position_details(
+    token_position_details: TokenPositionDetails, read_file: t.List[str]
+) -> Range:
+    """
+    Convert a TokenPositionDetails object to a Range object.
+
+    :param token_position_details: Details about a token's position
+    :param read_file: List of lines from the file
+    :return: A Range object representing the token's position
+    """
+    # Convert from 1-indexed to 0-indexed for line only
+    end_line_0 = token_position_details.line - 1
+    end_col_0 = token_position_details.col
+
+    # Find the start line and column by counting backwards from the end position
+    start_pos = token_position_details.start
+    end_pos = token_position_details.end
+
+    # Initialize with the end position
+    start_line_0 = end_line_0
+    start_col_0 = end_col_0 - (end_pos - start_pos + 1)
+
+    # If start_col_0 is negative, we need to go back to previous lines
+    while start_col_0 < 0 and start_line_0 > 0:
+        start_line_0 -= 1
+        start_col_0 += len(read_file[start_line_0])
+        # Account for newline character
+        if start_col_0 >= 0:
+            break
+        start_col_0 += 1  # For the newline character
+
+    # Ensure we don't have negative values
+    start_col_0 = max(0, start_col_0)
+    return Range(
+        start=Position(line=start_line_0, character=start_col_0),
+        end=Position(line=end_line_0, character=end_col_0),
+    )
diff --git a/tests/lsp/test_context.py b/tests/lsp/test_context.py
@@ -0,0 +1,19 @@
+import pytest
+from sqlmesh.core.context import Context
+from sqlmesh.lsp.context import LSPContext
+
+
+@pytest.mark.fast
+def test_lsp_context():
+    context = Context(paths=["examples/sushi"])
+    lsp_context = LSPContext(context)
+
+    assert lsp_context is not None
+    assert lsp_context.context is not None
+    assert lsp_context.map is not None
+
+    # find one model in the map
+    active_customers_key = next(
+        key for key in lsp_context.map.keys() if key.endswith("models/active_customers.sql")
+    )
+    assert lsp_context.map[active_customers_key] == ["sushi.active_customers"]
diff --git a/tests/lsp/test_reference.py b/tests/lsp/test_reference.py
@@ -0,0 +1,48 @@
+import pytest
+from sqlmesh.core.context import Context
+from sqlmesh.lsp.context import LSPContext
+from sqlmesh.lsp.reference import get_model_definitions_for_a_path
+
+
+@pytest.mark.fast
+def test_reference() -> None:
+    context = Context(paths=["examples/sushi"])
+    lsp_context = LSPContext(context)
+
+    active_customers_uri = next(
+        uri for uri, models in lsp_context.map.items() if "sushi.active_customers" in models
+    )
+    sushi_customers_uri = next(
+        uri for uri, models in lsp_context.map.items() if "sushi.customers" in models
+    )
+
+    references = get_model_definitions_for_a_path(lsp_context, active_customers_uri)
+
+    assert len(references) == 1
+    assert references[0].uri == sushi_customers_uri
+
+    # Check that the reference in the correct range is sushi.customers
+    path = active_customers_uri.removeprefix("file://")
+    read_file = open(path, "r").readlines()
+    # Get the string range in the read file
+    reference_range = references[0].range
+    start_line = reference_range.start.line
+    end_line = reference_range.end.line
+    start_character = reference_range.start.character
+    end_character = reference_range.end.character
+    # Get the string from the file
+
+    # If the reference spans multiple lines, handle it accordingly
+    if start_line == end_line:
+        # Reference is on a single line
+        line_content = read_file[start_line]
+        referenced_text = line_content[start_character:end_character]
+    else:
+        # Reference spans multiple lines
+        referenced_text = read_file[start_line][
+            start_character:
+        ]  # First line from start_character to end
+        for line_num in range(start_line + 1, end_line):  # Middle lines (if any)
+            referenced_text += read_file[line_num]
+        referenced_text += read_file[end_line][:end_character]  # Last line up to end_character
+    assert referenced_text == "sushi.customers"
diff --git a/vscode/extension/src/lsp/lsp.ts b/vscode/extension/src/lsp/lsp.ts
@@ -27,7 +27,9 @@ export class LSPClient implements Disposable {
 
     const sqlmesh = await sqlmesh_lsp_exec()
     if (isErr(sqlmesh)) {
-      traceError(`Failed to get sqlmesh_lsp_exec, ${sqlmesh.error.type}`)
+      traceError(
+        `Failed to get sqlmesh_lsp_exec, ${JSON.stringify(sqlmesh.error)}`,
+      )
       return sqlmesh
     }
     const workspaceFolders = getWorkspaceFolders()