WIP Add basic dbt objects and rule definitions

PicnicSupermarket · Mar 8, 2024 · c243c5b · c243c5b
1 parent cba523e
commit c243c5b
Show file tree

Hide file tree

Showing 5 changed files with 285 additions and 0 deletions.
diff --git a/src/dbt_score/manifest.py b/src/dbt_score/manifest.py
@@ -0,0 +1,125 @@
+from dataclasses import dataclass, field
+from typing import Any, List
+
+
+@dataclass
+class Constraint:
+    """Constraint for a column in a model."""
+
+    type: str
+    expression: str
+    name: str
+
+
+@dataclass
+class Test:
+    """Test for a column or model."""
+
+    name: str
+    type: str
+    tags: list[str] = field(default_factory=list)
+
+
+@dataclass
+class Column:
+    """Represents a column in a model."""
+
+    name: str
+    description: str
+    constraints: List[Constraint]
+    tests: List[Test] = field(default_factory=list)
+
+
+@dataclass
+class Model:
+    """Represents a dbt model."""
+
+    id: str
+    name: str
+    description: str
+    file_path: str
+    config: dict[str, Any]
+    meta: dict[str, Any]
+    columns: dict[str, Column]
+    tests: list[Test] = field(default_factory=list)
+
+    @classmethod
+    def from_node(cls, node_values: dict[str, Any]) -> "Model":
+        """Create a model object from a node in the manifest."""
+        columns = {
+            name: Column(
+                name=values.get("name"),
+                description=values.get("description"),
+                constraints=[
+                    Constraint(
+                        name=constraint.get("name"),
+                        type=constraint.get("type"),
+                        expression=constraint.get("expression"),
+                    )
+                    for constraint in values.get("constraints", [])
+                ],
+            )
+            for name, values in node_values.get("columns", {}).items()
+        }
+
+        model = cls(
+            id=node_values["unique_id"],
+            file_path=node_values["patch_path"],
+            config=node_values.get("config", {}),
+            name=node_values["name"],
+            description=node_values.get("description", ""),
+            meta=node_values.get("meta", {}),
+            columns=columns,
+        )
+
+        return model
+
+
+class ManifestLoader:
+    """Load the models and tests from the manifest."""
+
+    def __init__(self, raw_manifest: dict[str, Any]):
+        self.raw_manifest = raw_manifest
+        self.raw_nodes = raw_manifest.get("nodes", {})
+        self.models: dict[str, Model] = {}
+        self.tests: dict[str, Test] = {}
+
+        # Load models first so the tests can be attached to them later.
+        self.load_models()
+        self.load_tests()
+
+    def load_models(self) -> None:
+        """Load the models from the manifest."""
+        for node_values in self.raw_nodes.values():
+            if node_values.get("resource_type") == "model":
+                model = Model.from_node(node_values)
+                self.models[model.id] = model
+
+    def load_tests(self) -> None:
+        """Load the tests from the manifest and attach them to the right object."""
+        for node_values in self.raw_nodes.values():
+            # Only include tests that are attached to a model.
+            if node_values.get("resource_type") == "test" and node_values.get(
+                "attached_node"
+            ):
+                model = self.models.get(node_values.get("attached_node"))
+
+                if not model:
+                    raise ValueError(
+                        f"Model {node_values.get('attached_node')}"
+                        f"not found, while tests are attached to it."
+                    )
+
+                test = Test(
+                    name=node_values.get("name"),
+                    type=node_values.get("test_metadata").get("name"),
+                    tags=node_values.get("tags"),
+                )
+                column_name = (
+                    node_values.get("test_metadata").get("kwargs").get("column_name")
+                )
+
+                if column_name:  # Test is a column-level test.
+                    model.columns[column_name].tests.append(test)
+                else:
+                    model.tests.append(test)
diff --git a/src/dbt_score/rule.py b/src/dbt_score/rule.py
@@ -0,0 +1,47 @@
+import functools
+import logging
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any, Callable
+
+from dbt_score.manifest import Model
+
+logging.basicConfig()
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
+class Severity(Enum):
+    """The severity/weight of a rule."""
+
+    LOW = 1
+    MEDIUM = 2
+    HIGH = 3
+    CRITICAL = 4
+
+
+@dataclass
+class RuleViolation:
+    """The violation of a rule."""
+
+    message: str | None = None
+
+
+def rule(
+    description: str,
+    hint: str,
+    severity: Severity = Severity.MEDIUM,
+) -> Callable[[Callable[[Model], RuleViolation | None]], Callable[..., None]]:
+    """Rule decorator."""
+
+    def decorator_rule(
+        func: Callable[[Model], RuleViolation | None],
+    ) -> Callable[..., None]:
+        @functools.wraps(func)
+        def wrapper_rule(*args: Any, **kwargs: Any) -> Any:
+            logger.debug("Executing `%s` with severity: %s.", func.__name__, severity)
+            return func(*args, **kwargs)
+
+        return wrapper_rule
+
+    return decorator_rule
diff --git a/src/dbt_score/rules/__init__.py b/src/dbt_score/rules/__init__.py
diff --git a/src/dbt_score/rules/example_rules.py b/src/dbt_score/rules/example_rules.py
@@ -0,0 +1,93 @@
+"""All general rules."""
+
+from ..manifest import Model
+from ..rule import RuleViolation, Severity, rule
+
+
+@rule(
+    description="A model should have an owner defined.",
+    hint="Define the owner of the model in the meta section.",
+    severity=Severity.HIGH,
+)
+def has_owner(model: Model) -> RuleViolation | None:
+    """A model should have an owner defined."""
+    if "owner" not in model.meta:
+        return RuleViolation()
+
+    return None
+
+
+@rule(description="A model should have a primary key defined.", hint="Some hint.")
+def has_primary_key(model: Model) -> RuleViolation | None:
+    """A model should have a primary key defined, unless it's a view."""
+    if not model.config.get("materialized") == "picnic_view":
+        has_pk = False
+        for column in model.columns.values():
+            if "primary_key" in [constraint.type for constraint in column.constraints]:
+                has_pk = True
+                break
+
+        if not has_pk:
+            return RuleViolation()
+
+    return None
+
+
+@rule(
+    description="Primary key columns should have a uniqueness test defined.",
+    hint="Some hint.",
+)
+def primary_key_has_uniqueness_test(model: Model) -> RuleViolation | None:
+    """Primary key columns should have a uniqueness test defined."""
+    columns_with_pk = []
+    if not model.config.get("materialized") == "picnic_view":
+        for column_name, column in model.columns.items():
+            if "primary_key" in [constraint.type for constraint in column.constraints]:
+                columns_with_pk.append(column_name)
+
+        tests = (
+            model.columns[columns_with_pk[0]].tests
+            if len(columns_with_pk) == 1
+            else model.tests
+        )
+
+        if columns_with_pk and "unique" not in [test.type for test in tests]:
+            return RuleViolation()
+
+    return None
+
+
+@rule(
+    description="All columns of a model should have a description.", hint="Some hint."
+)
+def columns_have_description(model: Model) -> RuleViolation | None:
+    """All columns of a model should have a description."""
+    invalid_columns = [
+        column_name
+        for column_name, column in model.columns.items()
+        if not column.description
+    ]
+    if invalid_columns:
+        return RuleViolation(
+            message=f"The following columns lack a description: "
+            f"{', '.join(invalid_columns)}."
+        )
+
+    return None
+
+
+@rule(description="A model should have at least one test defined.", hint="Some hint.")
+def has_test(model: Model) -> RuleViolation | None:
+    """A model should have at least one model-level and one column-level test.
+
+    This does not include singular tests, which are tests defined in a separate .sql
+    file and not linked to the model in the metadata.
+    """
+    column_tests = []
+    for column in model.columns.values():
+        column_tests.extend(column.tests)
+
+    if len(model.tests) == 0 or len(column_tests) == 0:
+        return RuleViolation()
+
+    return None
diff --git a/src/dbt_score/utils.py b/src/dbt_score/utils.py
@@ -0,0 +1,20 @@
+"""Utility functions."""
+
+import json
+from pathlib import Path
+from typing import Any
+
+
+class JsonOpenError(RuntimeError):
+    """Raised when there is an error opening a JSON file."""
+
+    pass
+
+
+def get_json(json_filename: str) -> Any:
+    """Get JSON from a file."""
+    try:
+        file_content = Path(json_filename).read_text(encoding="utf-8")
+        return json.loads(file_content)
+    except Exception as e:
+        raise JsonOpenError(f"Error opening {json_filename}.") from e