Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ and this project adheres to

## [Unreleased]

- Add support for linting and scoring dbt seeds (#110)

## [0.11.0] - 2025-04-04

- Improve documentation on rule filters. (#93)
Expand Down
3 changes: 2 additions & 1 deletion src/dbt_score/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
"""Init dbt_score package."""

from dbt_score.models import Model, Snapshot, Source
from dbt_score.models import Model, Seed, Snapshot, Source
from dbt_score.rule import Rule, RuleViolation, Severity, rule
from dbt_score.rule_filter import RuleFilter, rule_filter

__all__ = [
"Model",
"Source",
"Snapshot",
"Seed",
"RuleFilter",
"Rule",
"RuleViolation",
Expand Down
11 changes: 10 additions & 1 deletion src/dbt_score/dbt_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,16 @@ def dbt_parse() -> "dbtRunnerResult":
@dbt_required
def dbt_ls(select: Iterable[str] | None) -> Iterable[str]:
"""Run dbt ls."""
cmd = ["ls", "--resource-types", "model", "source", "snapshot", "--output", "name"]
cmd = [
"ls",
"--resource-types",
"model",
"source",
"snapshot",
"seed",
"--output",
"name",
]
if select:
cmd += ["--select", *select]

Expand Down
2 changes: 2 additions & 0 deletions src/dbt_score/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def evaluate(self) -> None:
self._manifest_loader.models,
self._manifest_loader.sources,
self._manifest_loader.snapshots,
self._manifest_loader.seeds,
):
# type inference on elements from `chain` is wonky
# and resolves to superclass HasColumnsMixin
Expand Down Expand Up @@ -97,5 +98,6 @@ def evaluate(self) -> None:
self._manifest_loader.models
or self._manifest_loader.sources
or self._manifest_loader.snapshots
or self._manifest_loader.seeds
):
self._formatter.project_evaluated(self.project_score)
4 changes: 3 additions & 1 deletion src/dbt_score/formatters/human_readable_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from dbt_score.evaluation import EvaluableResultsType
from dbt_score.formatters import Formatter
from dbt_score.models import Evaluable, Model, Snapshot, Source
from dbt_score.models import Evaluable, Model, Seed, Snapshot, Source
from dbt_score.rule import RuleViolation
from dbt_score.scoring import Score

Expand Down Expand Up @@ -37,6 +37,8 @@ def pretty_name(evaluable: Evaluable) -> str:
return evaluable.selector_name
case Snapshot():
return evaluable.name
case Seed():
return evaluable.name
case _:
raise NotImplementedError

Expand Down
100 changes: 95 additions & 5 deletions src/dbt_score/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,11 +482,89 @@ def __hash__(self) -> int:
return hash(self.unique_id)


Evaluable: TypeAlias = Model | Source | Snapshot
@dataclass
class Seed(HasColumnsMixin):
"""Represents a dbt seed.

Attributes:
unique_id: The id of the seed, e.g. `seed.package.seed_name`.
name: The name of the seed.
relation_name: The relation name of the seed, e.g. `db.schema.seed_name`.
description: The full description of the seed.
original_file_path: The file path of the seed CSV.
config: The config of the seed.
meta: The meta of the seed.
columns: The list of columns of the seed.
package_name: The package name of the seed.
database: The database name of the seed.
schema: The schema name of the seed.
alias: The alias of the seed.
patch_path: The yml path of the seed.
tags: The list of tags attached to the seed.
tests: The list of tests attached to the seed.
_raw_values: The raw values of the seed (node) in the manifest.
_raw_test_values: The raw test values of the seed (node) in the manifest.
"""

unique_id: str
name: str
relation_name: str
description: str
original_file_path: str
config: dict[str, Any]
meta: dict[str, Any]
columns: list[Column]
package_name: str
database: str
schema: str
alias: str | None = None
patch_path: str | None = None
tags: list[str] = field(default_factory=list)
tests: list[Test] = field(default_factory=list)
_raw_values: dict[str, Any] = field(default_factory=dict)
_raw_test_values: list[dict[str, Any]] = field(default_factory=list)

@classmethod
def from_node(
cls, node_values: dict[str, Any], test_values: list[dict[str, Any]]
) -> "Seed":
"""Create a seed object from a node and its tests in the manifest."""
return cls(
unique_id=node_values["unique_id"],
name=node_values["name"],
relation_name=node_values["relation_name"],
description=node_values["description"],
original_file_path=node_values["original_file_path"],
config=node_values["config"],
meta=node_values["meta"],
columns=cls._get_columns(node_values, test_values),
package_name=node_values["package_name"],
database=node_values["database"],
schema=node_values["schema"],
alias=node_values["alias"],
patch_path=node_values["patch_path"],
tags=node_values["tags"],
tests=[
Test.from_node(test)
for test in test_values
if not test.get("test_metadata", {})
.get("kwargs", {})
.get("column_name")
],
_raw_values=node_values,
_raw_test_values=test_values,
)

def __hash__(self) -> int:
"""Compute a unique hash for a seed."""
return hash(self.unique_id)


Evaluable: TypeAlias = Model | Source | Snapshot | Seed


class ManifestLoader:
"""Load the models, sources, snapshots and tests from the manifest."""
"""Load the models, sources, snapshots, seeds and tests from the manifest."""

def __init__(self, file_path: Path, select: Iterable[str] | None = None):
"""Initialize the ManifestLoader.
Expand All @@ -512,16 +590,20 @@ def __init__(self, file_path: Path, select: Iterable[str] | None = None):
self.tests: dict[str, list[dict[str, Any]]] = defaultdict(list)
self.sources: list[Source] = []
self.snapshots: list[Snapshot] = []
self.seeds: list[Seed] = []

self._reindex_tests()
self._load_models()
self._load_sources()
self._load_snapshots()
self._load_seeds()

if select:
self._filter_evaluables(select)

if (len(self.models) + len(self.sources) + len(self.snapshots)) == 0:
if (
len(self.models) + len(self.sources) + len(self.snapshots) + len(self.seeds)
) == 0:
logger.warning("Nothing to evaluate!")

def _load_models(self) -> None:
Expand All @@ -545,6 +627,13 @@ def _load_snapshots(self) -> None:
snapshot = Snapshot.from_node(node_values, self.tests.get(node_id, []))
self.snapshots.append(snapshot)

def _load_seeds(self) -> None:
"""Load the seeds from the manifest."""
for node_id, node_values in self.raw_nodes.items():
if node_values.get("resource_type") == "seed":
seed = Seed.from_node(node_values, self.tests.get(node_id, []))
self.seeds.append(seed)

def _reindex_tests(self) -> None:
"""Index tests based on their associated evaluable."""
for node_values in self.raw_nodes.values():
Expand All @@ -566,8 +655,8 @@ def _filter_evaluables(self, select: Iterable[str]) -> None:
single_model_select = re.compile(r"[a-zA-Z0-9_]+")

if all(single_model_select.fullmatch(x) for x in select):
# Using '--select my_model' is a common case, which can easily be sped up by
# not invoking dbt
# Using '--select my_model' is a common case, which
# can easily be sped up by not invoking dbt
selected = select
else:
# Use dbt's implementation of --select
Expand All @@ -576,3 +665,4 @@ def _filter_evaluables(self, select: Iterable[str]) -> None:
self.models = [m for m in self.models if m.name in selected]
self.sources = [s for s in self.sources if s.selector_name in selected]
self.snapshots = [s for s in self.snapshots if s.name in selected]
self.seeds = [s for s in self.seeds if s.name in selected]
13 changes: 11 additions & 2 deletions src/dbt_score/rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
overload,
)

from dbt_score.models import Evaluable, Model, Snapshot, Source
from dbt_score.models import Evaluable, Model, Seed, Snapshot, Source
from dbt_score.more_itertools import first_true
from dbt_score.rule_filter import RuleFilter

Expand Down Expand Up @@ -66,8 +66,12 @@ class RuleViolation:
ModelRuleEvaluationType: TypeAlias = Callable[[Model], RuleViolation | None]
SourceRuleEvaluationType: TypeAlias = Callable[[Source], RuleViolation | None]
SnapshotRuleEvaluationType: TypeAlias = Callable[[Snapshot], RuleViolation | None]
SeedRuleEvaluationType: TypeAlias = Callable[[Seed], RuleViolation | None]
RuleEvaluationType: TypeAlias = (
ModelRuleEvaluationType | SourceRuleEvaluationType | SnapshotRuleEvaluationType
ModelRuleEvaluationType
| SourceRuleEvaluationType
| SnapshotRuleEvaluationType
| SeedRuleEvaluationType
)


Expand Down Expand Up @@ -206,6 +210,11 @@ def rule(__func: SnapshotRuleEvaluationType) -> Type[Rule]:
...


@overload
def rule(__func: SeedRuleEvaluationType) -> Type[Rule]:
...


@overload
def rule(
*,
Expand Down
9 changes: 8 additions & 1 deletion src/dbt_score/rule_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,18 @@
import typing
from typing import Any, Callable, Type, TypeAlias, cast, overload

from dbt_score.models import Evaluable, Model, Snapshot, Source
from dbt_score.models import Evaluable, Model, Seed, Snapshot, Source
from dbt_score.more_itertools import first_true

ModelFilterEvaluationType: TypeAlias = Callable[[Model], bool]
SourceFilterEvaluationType: TypeAlias = Callable[[Source], bool]
SnapshotFilterEvaluationType: TypeAlias = Callable[[Snapshot], bool]
SeedRuleEvaluationType: TypeAlias = Callable[[Seed], bool]
FilterEvaluationType: TypeAlias = (
ModelFilterEvaluationType
| SourceFilterEvaluationType
| SnapshotFilterEvaluationType
| SeedRuleEvaluationType
)


Expand Down Expand Up @@ -87,6 +89,11 @@ def rule_filter(__func: SnapshotFilterEvaluationType) -> Type[RuleFilter]:
...


@overload
def rule_filter(__func: SeedRuleEvaluationType) -> Type[RuleFilter]:
...


@overload
def rule_filter(
*,
Expand Down
38 changes: 37 additions & 1 deletion src/dbt_score/rules/generic.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""All generic rules."""

from dbt_score import Model, RuleViolation, Severity, Snapshot, rule
from dbt_score import Model, RuleViolation, Seed, Severity, Snapshot, rule
from dbt_score.rules.filters import is_table

MAX_DESCRIPTION_MESSAGE_LENGTH = 60


@rule
def snapshot_has_unique_key(snapshot: Snapshot) -> RuleViolation | None:
Expand Down Expand Up @@ -134,3 +136,37 @@ def has_no_unused_is_incremental(model: Model) -> RuleViolation | None:
and "is_incremental()" in model.raw_code
):
return RuleViolation("Non-incremental model makes use of is_incremental().")


@rule
def seed_has_description(seed: Seed) -> RuleViolation | None:
"""A seed should have a description."""
if not seed.description:
return RuleViolation(message="Seed lacks a description.")


@rule
def seed_columns_have_description(seed: Seed) -> RuleViolation | None:
"""All columns of a seed should have a description."""
invalid_column_names = [
column.name for column in seed.columns if not column.description
]
if invalid_column_names:
message = f"Columns lack a description: {', '.join(invalid_column_names)}."
if len(message) > MAX_DESCRIPTION_MESSAGE_LENGTH:
message = f"{message[:MAX_DESCRIPTION_MESSAGE_LENGTH]}…"
return RuleViolation(message=message)


@rule(severity=Severity.LOW)
def seed_has_tests(seed: Seed) -> RuleViolation | None:
"""A seed should have at least one test."""
if not seed.tests and not any(column.tests for column in seed.columns):
return RuleViolation(message="Seed has no tests.")


@rule
def seed_has_owner(seed: Seed) -> RuleViolation | None:
"""A seed should have an owner."""
if not seed.meta.get("owner"):
return RuleViolation(message="Seed lacks an owner.")
Loading