Skip to content

Commit cb57712

Browse files
Add support for linting and scoring dbt seeds (#110)
## Overview Add support for linting and scoring seed resources in dbt-score, following issue [#105](#105). ## Problem Previously, dbt-score only supported linting models, sources, and snapshots. Seeds were not evaluated, creating an inconsistency in the quality assessment of a dbt project's metadata. Since seeds often contain important reference data, ensuring they have proper documentation and ownership is valuable. ## Implementation - Added `Seed` class to represent dbt seeds - Updated `ManifestLoader` to load seeds from the manifest - Added seed-specific linting rules (description, columns, tests, ownership) - Updated `Evaluation` class to include seeds in evaluation chain - Modified formatters to handle and display seed results - Added comprehensive tests for seed support ## New Rules - `seed_has_description` - Ensures seeds have descriptive documentation - `seed_columns_have_description` - Verifies seed columns are documented - `seed_has_tests` - Checks that seeds have appropriate tests - `seed_has_owner` - Ensures seeds have defined ownership ## Testing Full test coverage has been added for seed support. - Fixtures for seeds in test suite - Tests for seed-specific rules - Updates to existing tests to accommodate seeds --------- Co-authored-by: Jochem van Dooren <[email protected]>
1 parent edf0563 commit cb57712

File tree

17 files changed

+421
-40
lines changed

17 files changed

+421
-40
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ and this project adheres to
88

99
## [Unreleased]
1010

11+
- Add support for linting and scoring dbt seeds (#110)
1112
- Add `parents` to models and snapshots, allowing access to parent nodes. (#109)
1213

1314
## [0.11.0] - 2025-04-04

pdm.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/dbt_score/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
"""Init dbt_score package."""
22

3-
from dbt_score.models import Model, Snapshot, Source
3+
from dbt_score.models import Model, Seed, Snapshot, Source
44
from dbt_score.rule import Rule, RuleViolation, Severity, rule
55
from dbt_score.rule_filter import RuleFilter, rule_filter
66

77
__all__ = [
88
"Model",
99
"Source",
1010
"Snapshot",
11+
"Seed",
1112
"RuleFilter",
1213
"Rule",
1314
"RuleViolation",

src/dbt_score/dbt_utils.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,16 @@ def dbt_parse() -> "dbtRunnerResult":
6969
@dbt_required
7070
def dbt_ls(select: Iterable[str] | None) -> Iterable[str]:
7171
"""Run dbt ls."""
72-
cmd = ["ls", "--resource-types", "model", "source", "snapshot", "--output", "name"]
72+
cmd = [
73+
"ls",
74+
"--resource-types",
75+
"model",
76+
"source",
77+
"snapshot",
78+
"seed",
79+
"--output",
80+
"name",
81+
]
7382
if select:
7483
cmd += ["--select", *select]
7584

src/dbt_score/evaluation.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def evaluate(self) -> None:
6464
self._manifest_loader.models.values(),
6565
self._manifest_loader.sources.values(),
6666
self._manifest_loader.snapshots.values(),
67+
self._manifest_loader.seeds.values(),
6768
):
6869
# type inference on elements from `chain` is wonky
6970
# and resolves to superclass HasColumnsMixin
@@ -97,5 +98,6 @@ def evaluate(self) -> None:
9798
self._manifest_loader.models
9899
or self._manifest_loader.sources
99100
or self._manifest_loader.snapshots
101+
or self._manifest_loader.seeds
100102
):
101103
self._formatter.project_evaluated(self.project_score)

src/dbt_score/formatters/human_readable_formatter.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from dbt_score.evaluation import EvaluableResultsType
66
from dbt_score.formatters import Formatter
7-
from dbt_score.models import Evaluable, Model, Snapshot, Source
7+
from dbt_score.models import Evaluable, Model, Seed, Snapshot, Source
88
from dbt_score.rule import RuleViolation
99
from dbt_score.scoring import Score
1010

@@ -37,6 +37,8 @@ def pretty_name(evaluable: Evaluable) -> str:
3737
return evaluable.selector_name
3838
case Snapshot():
3939
return evaluable.name
40+
case Seed():
41+
return evaluable.name
4042
case _:
4143
raise NotImplementedError
4244

src/dbt_score/models.py

Lines changed: 104 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from collections import defaultdict
77
from dataclasses import dataclass, field
88
from pathlib import Path
9-
from typing import Any, Iterable, Literal, TypeAlias, Union
9+
from typing import Any, Iterable, List, Literal, TypeAlias, Union
1010

1111
from dbt_score.dbt_utils import dbt_ls
1212

@@ -154,6 +154,10 @@ def _get_columns(
154154
]
155155

156156

157+
# Type annotation for parent references
158+
ParentType = Union["Model", "Source", "Snapshot", "Seed"]
159+
160+
157161
@dataclass
158162
class Model(HasColumnsMixin):
159163
"""Represents a dbt model.
@@ -205,7 +209,7 @@ class Model(HasColumnsMixin):
205209
tests: list[Test] = field(default_factory=list)
206210
depends_on: dict[str, list[str]] = field(default_factory=dict)
207211
constraints: list[Constraint] = field(default_factory=list)
208-
parents: list[Union["Model", "Source", "Snapshot"]] = field(default_factory=list)
212+
parents: List[ParentType] = field(default_factory=list)
209213
_raw_values: dict[str, Any] = field(default_factory=dict)
210214
_raw_test_values: list[dict[str, Any]] = field(default_factory=list)
211215

@@ -245,6 +249,7 @@ def from_node(
245249
Constraint.from_raw_values(constraint)
246250
for constraint in node_values["constraints"]
247251
],
252+
parents=[], # Will be populated later
248253
_raw_values=node_values,
249254
_raw_test_values=test_values,
250255
)
@@ -443,7 +448,7 @@ class Snapshot(HasColumnsMixin):
443448
depends_on: dict[str, list[str]] = field(default_factory=dict)
444449
strategy: str | None = None
445450
unique_key: list[str] | None = None
446-
parents: list[Union["Model", "Source", "Snapshot"]] = field(default_factory=list)
451+
parents: List[ParentType] = field(default_factory=list)
447452
_raw_values: dict[str, Any] = field(default_factory=dict)
448453
_raw_test_values: list[dict[str, Any]] = field(default_factory=list)
449454

@@ -477,6 +482,7 @@ def from_node(
477482
.get("column_name")
478483
],
479484
depends_on=node_values["depends_on"],
485+
parents=[], # Will be populated later
480486
_raw_values=node_values,
481487
_raw_test_values=test_values,
482488
)
@@ -486,11 +492,89 @@ def __hash__(self) -> int:
486492
return hash(self.unique_id)
487493

488494

489-
Evaluable: TypeAlias = Model | Source | Snapshot
495+
@dataclass
496+
class Seed(HasColumnsMixin):
497+
"""Represents a dbt seed.
498+
499+
Attributes:
500+
unique_id: The id of the seed, e.g. `seed.package.seed_name`.
501+
name: The name of the seed.
502+
relation_name: The relation name of the seed, e.g. `db.schema.seed_name`.
503+
description: The full description of the seed.
504+
original_file_path: The seed path, e.g. `data/seed_name.csv`.
505+
config: The config of the seed.
506+
meta: The meta of the seed.
507+
columns: The list of columns of the seed.
508+
package_name: The package name of the seed.
509+
database: The database name of the seed.
510+
schema: The schema name of the seed.
511+
alias: The alias of the seed.
512+
patch_path: The yml path of the seed, e.g. `seeds.yml`.
513+
tags: The list of tags attached to the seed.
514+
tests: The list of tests attached to the seed.
515+
_raw_values: The raw values of the seed (node) in the manifest.
516+
_raw_test_values: The raw test values of the seed (node) in the manifest.
517+
"""
518+
519+
unique_id: str
520+
name: str
521+
relation_name: str
522+
description: str
523+
original_file_path: str
524+
config: dict[str, Any]
525+
meta: dict[str, Any]
526+
columns: list[Column]
527+
package_name: str
528+
database: str
529+
schema: str
530+
alias: str | None = None
531+
patch_path: str | None = None
532+
tags: list[str] = field(default_factory=list)
533+
tests: list[Test] = field(default_factory=list)
534+
_raw_values: dict[str, Any] = field(default_factory=dict)
535+
_raw_test_values: list[dict[str, Any]] = field(default_factory=list)
536+
537+
@classmethod
538+
def from_node(
539+
cls, node_values: dict[str, Any], test_values: list[dict[str, Any]]
540+
) -> "Seed":
541+
"""Create a seed object from a node and its tests in the manifest."""
542+
return cls(
543+
unique_id=node_values["unique_id"],
544+
name=node_values["name"],
545+
relation_name=node_values["relation_name"],
546+
description=node_values["description"],
547+
original_file_path=node_values["original_file_path"],
548+
config=node_values["config"],
549+
meta=node_values["meta"],
550+
columns=cls._get_columns(node_values, test_values),
551+
package_name=node_values["package_name"],
552+
database=node_values["database"],
553+
schema=node_values["schema"],
554+
alias=node_values["alias"],
555+
patch_path=node_values["patch_path"],
556+
tags=node_values["tags"],
557+
tests=[
558+
Test.from_node(test)
559+
for test in test_values
560+
if not test.get("test_metadata", {})
561+
.get("kwargs", {})
562+
.get("column_name")
563+
],
564+
_raw_values=node_values,
565+
_raw_test_values=test_values,
566+
)
567+
568+
def __hash__(self) -> int:
569+
"""Compute a unique hash for a seed."""
570+
return hash(self.unique_id)
571+
572+
573+
Evaluable: TypeAlias = Model | Source | Snapshot | Seed
490574

491575

492576
class ManifestLoader:
493-
"""Load the models, sources, snapshots and tests from the manifest."""
577+
"""Load the models, sources, snapshots, seeds and tests from the manifest."""
494578

495579
def __init__(self, file_path: Path, select: Iterable[str] | None = None):
496580
"""Initialize the ManifestLoader.
@@ -516,17 +600,21 @@ def __init__(self, file_path: Path, select: Iterable[str] | None = None):
516600
self.tests: dict[str, list[dict[str, Any]]] = defaultdict(list)
517601
self.sources: dict[str, Source] = {}
518602
self.snapshots: dict[str, Snapshot] = {}
603+
self.seeds: dict[str, Seed] = {}
519604

520605
self._reindex_tests()
521606
self._load_models()
522607
self._load_sources()
523608
self._load_snapshots()
609+
self._load_seeds()
524610
self._populate_parents()
525611

526612
if select:
527613
self._filter_evaluables(select)
528614

529-
if (len(self.models) + len(self.sources) + len(self.snapshots)) == 0:
615+
if (
616+
len(self.models) + len(self.sources) + len(self.snapshots) + len(self.seeds)
617+
) == 0:
530618
logger.warning("Nothing to evaluate!")
531619

532620
def _load_models(self) -> None:
@@ -550,6 +638,13 @@ def _load_snapshots(self) -> None:
550638
snapshot = Snapshot.from_node(node_values, self.tests.get(node_id, []))
551639
self.snapshots[node_id] = snapshot
552640

641+
def _load_seeds(self) -> None:
642+
"""Load the seeds from the manifest."""
643+
for node_id, node_values in self.raw_nodes.items():
644+
if node_values.get("resource_type") == "seed":
645+
seed = Seed.from_node(node_values, self.tests.get(node_id, []))
646+
self.seeds[node_id] = seed
647+
553648
def _reindex_tests(self) -> None:
554649
"""Index tests based on their associated evaluable."""
555650
for node_values in self.raw_nodes.values():
@@ -576,6 +671,8 @@ def _populate_parents(self) -> None:
576671
node.parents.append(self.snapshots[parent_id])
577672
elif parent_id in self.sources:
578673
node.parents.append(self.sources[parent_id])
674+
elif parent_id in self.seeds:
675+
node.parents.append(self.seeds[parent_id])
579676

580677
def _filter_evaluables(self, select: Iterable[str]) -> None:
581678
"""Filter evaluables like dbt's --select."""
@@ -594,3 +691,4 @@ def _filter_evaluables(self, select: Iterable[str]) -> None:
594691
k: s for k, s in self.sources.items() if s.selector_name in selected
595692
}
596693
self.snapshots = {k: s for k, s in self.snapshots.items() if s.name in selected}
694+
self.seeds = {k: s for k, s in self.seeds.items() if s.name in selected}

src/dbt_score/rule.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
overload,
1515
)
1616

17-
from dbt_score.models import Evaluable, Model, Snapshot, Source
17+
from dbt_score.models import Evaluable, Model, Seed, Snapshot, Source
1818
from dbt_score.more_itertools import first_true
1919
from dbt_score.rule_filter import RuleFilter
2020

@@ -66,8 +66,12 @@ class RuleViolation:
6666
ModelRuleEvaluationType: TypeAlias = Callable[[Model], RuleViolation | None]
6767
SourceRuleEvaluationType: TypeAlias = Callable[[Source], RuleViolation | None]
6868
SnapshotRuleEvaluationType: TypeAlias = Callable[[Snapshot], RuleViolation | None]
69+
SeedRuleEvaluationType: TypeAlias = Callable[[Seed], RuleViolation | None]
6970
RuleEvaluationType: TypeAlias = (
70-
ModelRuleEvaluationType | SourceRuleEvaluationType | SnapshotRuleEvaluationType
71+
ModelRuleEvaluationType
72+
| SourceRuleEvaluationType
73+
| SnapshotRuleEvaluationType
74+
| SeedRuleEvaluationType
7175
)
7276

7377

@@ -206,6 +210,11 @@ def rule(__func: SnapshotRuleEvaluationType) -> Type[Rule]:
206210
...
207211

208212

213+
@overload
214+
def rule(__func: SeedRuleEvaluationType) -> Type[Rule]:
215+
...
216+
217+
209218
@overload
210219
def rule(
211220
*,

src/dbt_score/rule_filter.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,18 @@
44
import typing
55
from typing import Any, Callable, Type, TypeAlias, cast, overload
66

7-
from dbt_score.models import Evaluable, Model, Snapshot, Source
7+
from dbt_score.models import Evaluable, Model, Seed, Snapshot, Source
88
from dbt_score.more_itertools import first_true
99

1010
ModelFilterEvaluationType: TypeAlias = Callable[[Model], bool]
1111
SourceFilterEvaluationType: TypeAlias = Callable[[Source], bool]
1212
SnapshotFilterEvaluationType: TypeAlias = Callable[[Snapshot], bool]
13+
SeedRuleEvaluationType: TypeAlias = Callable[[Seed], bool]
1314
FilterEvaluationType: TypeAlias = (
1415
ModelFilterEvaluationType
1516
| SourceFilterEvaluationType
1617
| SnapshotFilterEvaluationType
18+
| SeedRuleEvaluationType
1719
)
1820

1921

@@ -87,6 +89,11 @@ def rule_filter(__func: SnapshotFilterEvaluationType) -> Type[RuleFilter]:
8789
...
8890

8991

92+
@overload
93+
def rule_filter(__func: SeedRuleEvaluationType) -> Type[RuleFilter]:
94+
...
95+
96+
9097
@overload
9198
def rule_filter(
9299
*,

src/dbt_score/rules/generic.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""All generic rules."""
22

3-
from dbt_score import Model, RuleViolation, Severity, Snapshot, rule
3+
from dbt_score import Model, RuleViolation, Seed, Severity, Snapshot, rule
44
from dbt_score.rules.filters import is_table
55

66

@@ -35,7 +35,7 @@ def columns_have_description(model: Model) -> RuleViolation | None:
3535
max_length = 60
3636
message = f"Columns lack a description: {', '.join(invalid_column_names)}."
3737
if len(message) > max_length:
38-
message = f"{message[:60]}…"
38+
message = f"{message[:max_length]}…"
3939
return RuleViolation(message=message)
4040

4141

@@ -134,3 +134,32 @@ def has_no_unused_is_incremental(model: Model) -> RuleViolation | None:
134134
and "is_incremental()" in model.raw_code
135135
):
136136
return RuleViolation("Non-incremental model makes use of is_incremental().")
137+
138+
139+
@rule
140+
def seed_has_description(seed: Seed) -> RuleViolation | None:
141+
"""A seed should have a description."""
142+
if not seed.description:
143+
return RuleViolation(message="Seed lacks a description.")
144+
145+
146+
@rule
147+
def seed_columns_have_description(seed: Seed) -> RuleViolation | None:
148+
"""All columns of a seed should have a description."""
149+
invalid_column_names = [
150+
column.name for column in seed.columns if not column.description
151+
]
152+
if invalid_column_names:
153+
max_length = 60
154+
message = f"Columns lack a description: {', '.join(invalid_column_names)}."
155+
if len(message) > max_length:
156+
message = f"{message[:max_length]}…"
157+
return RuleViolation(message=message)
158+
159+
160+
@rule
161+
def seed_has_owner(seed: Seed) -> RuleViolation | None:
162+
"""A seed should have an owner."""
163+
meta = seed.config.get("meta", {})
164+
if not meta.get("owner"):
165+
return RuleViolation(message="Seed lacks an owner.")

0 commit comments

Comments
 (0)