Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ and this project adheres to

## [Unreleased]

- Add support for linting and scoring dbt seeds (#110)
- Add `parents` to models and snapshots, allowing access to parent nodes. (#109)

## [0.11.0] - 2025-04-04
Expand Down
2 changes: 1 addition & 1 deletion pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion src/dbt_score/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
"""Init dbt_score package."""

from dbt_score.models import Model, Snapshot, Source
from dbt_score.models import Model, Seed, Snapshot, Source
from dbt_score.rule import Rule, RuleViolation, Severity, rule
from dbt_score.rule_filter import RuleFilter, rule_filter

__all__ = [
"Model",
"Source",
"Snapshot",
"Seed",
"RuleFilter",
"Rule",
"RuleViolation",
Expand Down
11 changes: 10 additions & 1 deletion src/dbt_score/dbt_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,16 @@ def dbt_parse() -> "dbtRunnerResult":
@dbt_required
def dbt_ls(select: Iterable[str] | None) -> Iterable[str]:
"""Run dbt ls."""
cmd = ["ls", "--resource-types", "model", "source", "snapshot", "--output", "name"]
cmd = [
"ls",
"--resource-types",
"model",
"source",
"snapshot",
"seed",
"--output",
"name",
]
if select:
cmd += ["--select", *select]

Expand Down
2 changes: 2 additions & 0 deletions src/dbt_score/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def evaluate(self) -> None:
self._manifest_loader.models.values(),
self._manifest_loader.sources.values(),
self._manifest_loader.snapshots.values(),
self._manifest_loader.seeds.values(),
):
# type inference on elements from `chain` is wonky
# and resolves to superclass HasColumnsMixin
Expand Down Expand Up @@ -97,5 +98,6 @@ def evaluate(self) -> None:
self._manifest_loader.models
or self._manifest_loader.sources
or self._manifest_loader.snapshots
or self._manifest_loader.seeds
):
self._formatter.project_evaluated(self.project_score)
4 changes: 3 additions & 1 deletion src/dbt_score/formatters/human_readable_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from dbt_score.evaluation import EvaluableResultsType
from dbt_score.formatters import Formatter
from dbt_score.models import Evaluable, Model, Snapshot, Source
from dbt_score.models import Evaluable, Model, Seed, Snapshot, Source
from dbt_score.rule import RuleViolation
from dbt_score.scoring import Score

Expand Down Expand Up @@ -37,6 +37,8 @@ def pretty_name(evaluable: Evaluable) -> str:
return evaluable.selector_name
case Snapshot():
return evaluable.name
case Seed():
return evaluable.name
case _:
raise NotImplementedError

Expand Down
110 changes: 104 additions & 6 deletions src/dbt_score/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from collections import defaultdict
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Iterable, Literal, TypeAlias, Union
from typing import Any, Iterable, List, Literal, TypeAlias, Union

from dbt_score.dbt_utils import dbt_ls

Expand Down Expand Up @@ -154,6 +154,10 @@ def _get_columns(
]


# Type annotation for parent references
ParentType = Union["Model", "Source", "Snapshot", "Seed"]


@dataclass
class Model(HasColumnsMixin):
"""Represents a dbt model.
Expand Down Expand Up @@ -205,7 +209,7 @@ class Model(HasColumnsMixin):
tests: list[Test] = field(default_factory=list)
depends_on: dict[str, list[str]] = field(default_factory=dict)
constraints: list[Constraint] = field(default_factory=list)
parents: list[Union["Model", "Source", "Snapshot"]] = field(default_factory=list)
parents: List[ParentType] = field(default_factory=list)
_raw_values: dict[str, Any] = field(default_factory=dict)
_raw_test_values: list[dict[str, Any]] = field(default_factory=list)

Expand Down Expand Up @@ -245,6 +249,7 @@ def from_node(
Constraint.from_raw_values(constraint)
for constraint in node_values["constraints"]
],
parents=[], # Will be populated later
_raw_values=node_values,
_raw_test_values=test_values,
)
Expand Down Expand Up @@ -443,7 +448,7 @@ class Snapshot(HasColumnsMixin):
depends_on: dict[str, list[str]] = field(default_factory=dict)
strategy: str | None = None
unique_key: list[str] | None = None
parents: list[Union["Model", "Source", "Snapshot"]] = field(default_factory=list)
parents: List[ParentType] = field(default_factory=list)
_raw_values: dict[str, Any] = field(default_factory=dict)
_raw_test_values: list[dict[str, Any]] = field(default_factory=list)

Expand Down Expand Up @@ -477,6 +482,7 @@ def from_node(
.get("column_name")
],
depends_on=node_values["depends_on"],
parents=[], # Will be populated later
_raw_values=node_values,
_raw_test_values=test_values,
)
Expand All @@ -486,11 +492,89 @@ def __hash__(self) -> int:
return hash(self.unique_id)


Evaluable: TypeAlias = Model | Source | Snapshot
@dataclass
class Seed(HasColumnsMixin):
"""Represents a dbt seed.

Attributes:
unique_id: The id of the seed, e.g. `seed.package.seed_name`.
name: The name of the seed.
relation_name: The relation name of the seed, e.g. `db.schema.seed_name`.
description: The full description of the seed.
original_file_path: The seed path, e.g. `data/seed_name.csv`.
config: The config of the seed.
meta: The meta of the seed.
columns: The list of columns of the seed.
package_name: The package name of the seed.
database: The database name of the seed.
schema: The schema name of the seed.
alias: The alias of the seed.
patch_path: The yml path of the seed, e.g. `seeds.yml`.
tags: The list of tags attached to the seed.
tests: The list of tests attached to the seed.
_raw_values: The raw values of the seed (node) in the manifest.
_raw_test_values: The raw test values of the seed (node) in the manifest.
"""

unique_id: str
name: str
relation_name: str
description: str
original_file_path: str
config: dict[str, Any]
meta: dict[str, Any]
columns: list[Column]
package_name: str
database: str
schema: str
alias: str | None = None
patch_path: str | None = None
tags: list[str] = field(default_factory=list)
tests: list[Test] = field(default_factory=list)
_raw_values: dict[str, Any] = field(default_factory=dict)
_raw_test_values: list[dict[str, Any]] = field(default_factory=list)

@classmethod
def from_node(
cls, node_values: dict[str, Any], test_values: list[dict[str, Any]]
) -> "Seed":
"""Create a seed object from a node and its tests in the manifest."""
return cls(
unique_id=node_values["unique_id"],
name=node_values["name"],
relation_name=node_values["relation_name"],
description=node_values["description"],
original_file_path=node_values["original_file_path"],
config=node_values["config"],
meta=node_values["meta"],
columns=cls._get_columns(node_values, test_values),
package_name=node_values["package_name"],
database=node_values["database"],
schema=node_values["schema"],
alias=node_values["alias"],
patch_path=node_values["patch_path"],
tags=node_values["tags"],
tests=[
Test.from_node(test)
for test in test_values
if not test.get("test_metadata", {})
.get("kwargs", {})
.get("column_name")
],
_raw_values=node_values,
_raw_test_values=test_values,
)

def __hash__(self) -> int:
"""Compute a unique hash for a seed."""
return hash(self.unique_id)


Evaluable: TypeAlias = Model | Source | Snapshot | Seed


class ManifestLoader:
"""Load the models, sources, snapshots and tests from the manifest."""
"""Load the models, sources, snapshots, seeds and tests from the manifest."""

def __init__(self, file_path: Path, select: Iterable[str] | None = None):
"""Initialize the ManifestLoader.
Expand All @@ -516,17 +600,21 @@ def __init__(self, file_path: Path, select: Iterable[str] | None = None):
self.tests: dict[str, list[dict[str, Any]]] = defaultdict(list)
self.sources: dict[str, Source] = {}
self.snapshots: dict[str, Snapshot] = {}
self.seeds: dict[str, Seed] = {}

self._reindex_tests()
self._load_models()
self._load_sources()
self._load_snapshots()
self._load_seeds()
self._populate_parents()

if select:
self._filter_evaluables(select)

if (len(self.models) + len(self.sources) + len(self.snapshots)) == 0:
if (
len(self.models) + len(self.sources) + len(self.snapshots) + len(self.seeds)
) == 0:
logger.warning("Nothing to evaluate!")

def _load_models(self) -> None:
Expand All @@ -550,6 +638,13 @@ def _load_snapshots(self) -> None:
snapshot = Snapshot.from_node(node_values, self.tests.get(node_id, []))
self.snapshots[node_id] = snapshot

def _load_seeds(self) -> None:
"""Load the seeds from the manifest."""
for node_id, node_values in self.raw_nodes.items():
if node_values.get("resource_type") == "seed":
seed = Seed.from_node(node_values, self.tests.get(node_id, []))
self.seeds[node_id] = seed

def _reindex_tests(self) -> None:
"""Index tests based on their associated evaluable."""
for node_values in self.raw_nodes.values():
Expand All @@ -576,6 +671,8 @@ def _populate_parents(self) -> None:
node.parents.append(self.snapshots[parent_id])
elif parent_id in self.sources:
node.parents.append(self.sources[parent_id])
elif parent_id in self.seeds:
node.parents.append(self.seeds[parent_id])

def _filter_evaluables(self, select: Iterable[str]) -> None:
"""Filter evaluables like dbt's --select."""
Expand All @@ -594,3 +691,4 @@ def _filter_evaluables(self, select: Iterable[str]) -> None:
k: s for k, s in self.sources.items() if s.selector_name in selected
}
self.snapshots = {k: s for k, s in self.snapshots.items() if s.name in selected}
self.seeds = {k: s for k, s in self.seeds.items() if s.name in selected}
13 changes: 11 additions & 2 deletions src/dbt_score/rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
overload,
)

from dbt_score.models import Evaluable, Model, Snapshot, Source
from dbt_score.models import Evaluable, Model, Seed, Snapshot, Source
from dbt_score.more_itertools import first_true
from dbt_score.rule_filter import RuleFilter

Expand Down Expand Up @@ -66,8 +66,12 @@ class RuleViolation:
ModelRuleEvaluationType: TypeAlias = Callable[[Model], RuleViolation | None]
SourceRuleEvaluationType: TypeAlias = Callable[[Source], RuleViolation | None]
SnapshotRuleEvaluationType: TypeAlias = Callable[[Snapshot], RuleViolation | None]
SeedRuleEvaluationType: TypeAlias = Callable[[Seed], RuleViolation | None]
RuleEvaluationType: TypeAlias = (
ModelRuleEvaluationType | SourceRuleEvaluationType | SnapshotRuleEvaluationType
ModelRuleEvaluationType
| SourceRuleEvaluationType
| SnapshotRuleEvaluationType
| SeedRuleEvaluationType
)


Expand Down Expand Up @@ -206,6 +210,11 @@ def rule(__func: SnapshotRuleEvaluationType) -> Type[Rule]:
...


@overload
def rule(__func: SeedRuleEvaluationType) -> Type[Rule]:
...


@overload
def rule(
*,
Expand Down
9 changes: 8 additions & 1 deletion src/dbt_score/rule_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,18 @@
import typing
from typing import Any, Callable, Type, TypeAlias, cast, overload

from dbt_score.models import Evaluable, Model, Snapshot, Source
from dbt_score.models import Evaluable, Model, Seed, Snapshot, Source
from dbt_score.more_itertools import first_true

ModelFilterEvaluationType: TypeAlias = Callable[[Model], bool]
SourceFilterEvaluationType: TypeAlias = Callable[[Source], bool]
SnapshotFilterEvaluationType: TypeAlias = Callable[[Snapshot], bool]
SeedRuleEvaluationType: TypeAlias = Callable[[Seed], bool]
FilterEvaluationType: TypeAlias = (
ModelFilterEvaluationType
| SourceFilterEvaluationType
| SnapshotFilterEvaluationType
| SeedRuleEvaluationType
)


Expand Down Expand Up @@ -87,6 +89,11 @@ def rule_filter(__func: SnapshotFilterEvaluationType) -> Type[RuleFilter]:
...


@overload
def rule_filter(__func: SeedRuleEvaluationType) -> Type[RuleFilter]:
...


@overload
def rule_filter(
*,
Expand Down
33 changes: 31 additions & 2 deletions src/dbt_score/rules/generic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""All generic rules."""

from dbt_score import Model, RuleViolation, Severity, Snapshot, rule
from dbt_score import Model, RuleViolation, Seed, Severity, Snapshot, rule
from dbt_score.rules.filters import is_table


Expand Down Expand Up @@ -35,7 +35,7 @@ def columns_have_description(model: Model) -> RuleViolation | None:
max_length = 60
message = f"Columns lack a description: {', '.join(invalid_column_names)}."
if len(message) > max_length:
message = f"{message[:60]}…"
message = f"{message[:max_length]}…"
return RuleViolation(message=message)


Expand Down Expand Up @@ -134,3 +134,32 @@ def has_no_unused_is_incremental(model: Model) -> RuleViolation | None:
and "is_incremental()" in model.raw_code
):
return RuleViolation("Non-incremental model makes use of is_incremental().")


@rule
def seed_has_description(seed: Seed) -> RuleViolation | None:
"""A seed should have a description."""
if not seed.description:
return RuleViolation(message="Seed lacks a description.")


@rule
def seed_columns_have_description(seed: Seed) -> RuleViolation | None:
"""All columns of a seed should have a description."""
invalid_column_names = [
column.name for column in seed.columns if not column.description
]
if invalid_column_names:
max_length = 60
message = f"Columns lack a description: {', '.join(invalid_column_names)}."
if len(message) > max_length:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this redundant, as you can also do f"{message[:60]}…" if the length is lower than 60? It will just show the full string I think

message = f"{message[:max_length]}…"
return RuleViolation(message=message)


@rule
def seed_has_owner(seed: Seed) -> RuleViolation | None:
"""A seed should have an owner."""
meta = seed.config.get("meta", {})
if not meta.get("owner"):
return RuleViolation(message="Seed lacks an owner.")
Loading