Skip to content

Commit edf0563

Browse files
Embedding parents into models and snapshots (#109)
Following discussion in [this issue](#98) and related draft PRs, taking a simpler approach of adding `parents: list[Model | Source | Snapshot]` to `Model` and `Snapshot` models This should allow writing rules that compare a node to its parents ("any model with tag `tier_1` may only have parents that also have tag `tier_1`", "any model that does not have the tag `beta` may not have any parents that do have the tag `beta`"), to be able to make assertions about model lineage expectation. One could also traverse the graph in full (upstream) with a recursive rule that walks the graph via `parents`. Tested with a ~3000 model manifest locally, and things seem to work just fine; will do a bit more poking and prodding, though. --------- Co-authored-by: Jochem van Dooren <[email protected]>
1 parent e5bc395 commit edf0563

File tree

6 files changed

+84
-42
lines changed

6 files changed

+84
-42
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ and this project adheres to
88

99
## [Unreleased]
1010

11+
- Add `parents` to models and snapshots, allowing access to parent nodes. (#109)
12+
1113
## [0.11.0] - 2025-04-04
1214

1315
- Improve documentation on rule filters. (#93)

src/dbt_score/evaluation.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,9 @@ def evaluate(self) -> None:
6161
rules = self._rule_registry.rules.values()
6262

6363
for evaluable in chain(
64-
self._manifest_loader.models,
65-
self._manifest_loader.sources,
66-
self._manifest_loader.snapshots,
64+
self._manifest_loader.models.values(),
65+
self._manifest_loader.sources.values(),
66+
self._manifest_loader.snapshots.values(),
6767
):
6868
# type inference on elements from `chain` is wonky
6969
# and resolves to superclass HasColumnsMixin

src/dbt_score/models.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from collections import defaultdict
77
from dataclasses import dataclass, field
88
from pathlib import Path
9-
from typing import Any, Iterable, Literal, TypeAlias
9+
from typing import Any, Iterable, Literal, TypeAlias, Union
1010

1111
from dbt_score.dbt_utils import dbt_ls
1212

@@ -179,6 +179,7 @@ class Model(HasColumnsMixin):
179179
tags: The list of tags attached to the model.
180180
tests: The list of tests attached to the model.
181181
depends_on: Dictionary of models/sources/macros that the model depends on.
182+
parents: The list of models, sources, and snapshots this model depends on.
182183
_raw_values: The raw values of the model (node) in the manifest.
183184
_raw_test_values: The raw test values of the model (node) in the manifest.
184185
"""
@@ -204,6 +205,7 @@ class Model(HasColumnsMixin):
204205
tests: list[Test] = field(default_factory=list)
205206
depends_on: dict[str, list[str]] = field(default_factory=dict)
206207
constraints: list[Constraint] = field(default_factory=list)
208+
parents: list[Union["Model", "Source", "Snapshot"]] = field(default_factory=list)
207209
_raw_values: dict[str, Any] = field(default_factory=dict)
208210
_raw_test_values: list[dict[str, Any]] = field(default_factory=list)
209211

@@ -416,6 +418,7 @@ class Snapshot(HasColumnsMixin):
416418
depends_on: Dictionary of models/sources/macros that the model depends on.
417419
strategy: The strategy of the snapshot.
418420
unique_key: The unique key of the snapshot.
421+
parents: The list of models, sources, and snapshots this snapshot depends on.
419422
_raw_values: The raw values of the snapshot (node) in the manifest.
420423
_raw_test_values: The raw test values of the snapshot (node) in the manifest.
421424
"""
@@ -440,6 +443,7 @@ class Snapshot(HasColumnsMixin):
440443
depends_on: dict[str, list[str]] = field(default_factory=dict)
441444
strategy: str | None = None
442445
unique_key: list[str] | None = None
446+
parents: list[Union["Model", "Source", "Snapshot"]] = field(default_factory=list)
443447
_raw_values: dict[str, Any] = field(default_factory=dict)
444448
_raw_test_values: list[dict[str, Any]] = field(default_factory=list)
445449

@@ -508,15 +512,16 @@ def __init__(self, file_path: Path, select: Iterable[str] | None = None):
508512
if source_values["package_name"] == self.project_name
509513
}
510514

511-
self.models: list[Model] = []
515+
self.models: dict[str, Model] = {}
512516
self.tests: dict[str, list[dict[str, Any]]] = defaultdict(list)
513-
self.sources: list[Source] = []
514-
self.snapshots: list[Snapshot] = []
517+
self.sources: dict[str, Source] = {}
518+
self.snapshots: dict[str, Snapshot] = {}
515519

516520
self._reindex_tests()
517521
self._load_models()
518522
self._load_sources()
519523
self._load_snapshots()
524+
self._populate_parents()
520525

521526
if select:
522527
self._filter_evaluables(select)
@@ -529,21 +534,21 @@ def _load_models(self) -> None:
529534
for node_id, node_values in self.raw_nodes.items():
530535
if node_values.get("resource_type") == "model":
531536
model = Model.from_node(node_values, self.tests.get(node_id, []))
532-
self.models.append(model)
537+
self.models[node_id] = model
533538

534539
def _load_sources(self) -> None:
535540
"""Load the sources from the manifest."""
536541
for source_id, source_values in self.raw_sources.items():
537542
if source_values.get("resource_type") == "source":
538543
source = Source.from_node(source_values, self.tests.get(source_id, []))
539-
self.sources.append(source)
544+
self.sources[source_id] = source
540545

541546
def _load_snapshots(self) -> None:
542547
"""Load the snapshots from the manifest."""
543548
for node_id, node_values in self.raw_nodes.items():
544549
if node_values.get("resource_type") == "snapshot":
545550
snapshot = Snapshot.from_node(node_values, self.tests.get(node_id, []))
546-
self.snapshots.append(snapshot)
551+
self.snapshots[node_id] = snapshot
547552

548553
def _reindex_tests(self) -> None:
549554
"""Index tests based on their associated evaluable."""
@@ -561,6 +566,17 @@ def _reindex_tests(self) -> None:
561566
):
562567
self.tests[node_unique_id].append(node_values)
563568

569+
def _populate_parents(self) -> None:
570+
"""Populate `parents` for all models and snapshots."""
571+
for node in list(self.models.values()) + list(self.snapshots.values()):
572+
for parent_id in node.depends_on.get("nodes", []):
573+
if parent_id in self.models:
574+
node.parents.append(self.models[parent_id])
575+
elif parent_id in self.snapshots:
576+
node.parents.append(self.snapshots[parent_id])
577+
elif parent_id in self.sources:
578+
node.parents.append(self.sources[parent_id])
579+
564580
def _filter_evaluables(self, select: Iterable[str]) -> None:
565581
"""Filter evaluables like dbt's --select."""
566582
single_model_select = re.compile(r"[a-zA-Z0-9_]+")
@@ -573,6 +589,8 @@ def _filter_evaluables(self, select: Iterable[str]) -> None:
573589
# Use dbt's implementation of --select
574590
selected = dbt_ls(select)
575591

576-
self.models = [m for m in self.models if m.name in selected]
577-
self.sources = [s for s in self.sources if s.selector_name in selected]
578-
self.snapshots = [s for s in self.snapshots if s.name in selected]
592+
self.models = {k: m for k, m in self.models.items() if m.name in selected}
593+
self.sources = {
594+
k: s for k, s in self.sources.items() if s.selector_name in selected
595+
}
596+
self.snapshots = {k: s for k, s in self.snapshots.items() if s.name in selected}

tests/resources/manifest.json

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
"alias": "snapshot1_alias",
3131
"patch_path": "/path/to/snapshot1.yml",
3232
"tags": [],
33-
"depends_on": {},
33+
"depends_on": { "nodes": ["model.package.model1"] },
3434
"language": "sql",
3535
"access": "protected"
3636
},
@@ -61,7 +61,7 @@
6161
"alias": "snapshot2_alias",
6262
"patch_path": "/path/to/snapshot2.yml",
6363
"tags": [],
64-
"depends_on": {},
64+
"depends_on": { "nodes": ["source.package.my_source.table1"] },
6565
"language": "sql",
6666
"access": "protected"
6767
},
@@ -96,7 +96,13 @@
9696
"alias": "model1_alias",
9797
"patch_path": "/path/to/model1.yml",
9898
"tags": [],
99-
"depends_on": {},
99+
"depends_on": {
100+
"nodes": [
101+
"model.package.model2",
102+
"source.package.my_source.table1",
103+
"snapshot.package.snapshot2"
104+
]
105+
},
100106
"language": "sql",
101107
"access": "protected",
102108
"group": null

tests/test_evaluation.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ def test_evaluation_low_medium_high(
4141
)
4242
evaluation.evaluate()
4343

44-
model1 = manifest_loader.models[0]
45-
model2 = manifest_loader.models[1]
44+
model1 = manifest_loader.models["model.package.model1"]
45+
model2 = manifest_loader.models["model.package.model2"]
4646

4747
assert evaluation.results[model1][rule_severity_low] is None
4848
assert evaluation.results[model1][rule_severity_medium] is None
@@ -85,7 +85,7 @@ def test_evaluation_critical(
8585

8686
evaluation.evaluate()
8787

88-
model2 = manifest_loader.models[1]
88+
model2 = manifest_loader.models["model.package.model2"]
8989

9090
assert isinstance(evaluation.results[model2][rule_severity_critical], RuleViolation)
9191

@@ -157,8 +157,8 @@ def test_evaluation_rule_with_config(
157157
):
158158
"""Test rule evaluation with parameters."""
159159
manifest_loader = ManifestLoader(manifest_path)
160-
model1 = manifest_loader.models[0]
161-
model2 = manifest_loader.models[1]
160+
model1 = manifest_loader.models["model.package.model1"]
161+
model2 = manifest_loader.models["model.package.model2"]
162162

163163
config = Config()
164164
config._load_toml_file(str(valid_config_path))
@@ -216,12 +216,12 @@ def test_evaluation_with_filter(
216216
)
217217
evaluation.evaluate()
218218

219-
model1 = manifest_loader.models[0]
220-
model2 = manifest_loader.models[1]
221-
source1 = manifest_loader.sources[0]
222-
source2 = manifest_loader.sources[1]
223-
snapshot1 = manifest_loader.snapshots[0]
224-
snapshot2 = manifest_loader.snapshots[1]
219+
model1 = manifest_loader.models["model.package.model1"]
220+
model2 = manifest_loader.models["model.package.model2"]
221+
source1 = manifest_loader.sources["source.package.my_source.table1"]
222+
source2 = manifest_loader.sources["source.package.my_source.table2"]
223+
snapshot1 = manifest_loader.snapshots["snapshot.package.snapshot1"]
224+
snapshot2 = manifest_loader.snapshots["snapshot.package.snapshot2"]
225225

226226
assert model_rule_with_filter not in evaluation.results[model1]
227227
assert isinstance(evaluation.results[model2][model_rule_with_filter], RuleViolation)
@@ -266,12 +266,12 @@ def test_evaluation_with_class_filter(
266266
)
267267
evaluation.evaluate()
268268

269-
model1 = manifest_loader.models[0]
270-
model2 = manifest_loader.models[1]
271-
source1 = manifest_loader.sources[0]
272-
source2 = manifest_loader.sources[1]
273-
snapshot1 = manifest_loader.snapshots[0]
274-
snapshot2 = manifest_loader.snapshots[1]
269+
model1 = manifest_loader.models["model.package.model1"]
270+
model2 = manifest_loader.models["model.package.model2"]
271+
source1 = manifest_loader.sources["source.package.my_source.table1"]
272+
source2 = manifest_loader.sources["source.package.my_source.table2"]
273+
snapshot1 = manifest_loader.snapshots["snapshot.package.snapshot1"]
274+
snapshot2 = manifest_loader.snapshots["snapshot.package.snapshot2"]
275275

276276
assert model_class_rule_with_filter not in evaluation.results[model1]
277277
assert isinstance(
@@ -318,9 +318,9 @@ def test_evaluation_with_models_and_sources(
318318
)
319319
evaluation.evaluate()
320320

321-
model1 = manifest_loader.models[0]
322-
source1 = manifest_loader.sources[0]
323-
snapshot1 = manifest_loader.snapshots[0]
321+
model1 = manifest_loader.models["model.package.model1"]
322+
source1 = manifest_loader.sources["source.package.my_source.table1"]
323+
snapshot1 = manifest_loader.snapshots["snapshot.package.snapshot1"]
324324

325325
assert decorator_rule in evaluation.results[model1]
326326
assert decorator_rule_source not in evaluation.results[model1]

tests/test_models.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ def test_manifest_load(mock_read_text, raw_manifest):
1919
and node["package_name"] == raw_manifest["metadata"]["project_name"]
2020
]
2121
)
22-
assert loader.models[0].tests[0].name == "test2"
23-
assert loader.models[0].tests[1].name == "test4"
24-
assert loader.models[0].columns[0].tests[0].name == "test1"
22+
assert loader.models["model.package.model1"].tests[0].name == "test2"
23+
assert loader.models["model.package.model1"].tests[1].name == "test4"
24+
assert loader.models["model.package.model1"].columns[0].tests[0].name == "test1"
2525

2626
assert len(loader.sources) == len(
2727
[
@@ -30,7 +30,23 @@ def test_manifest_load(mock_read_text, raw_manifest):
3030
if source["package_name"] == raw_manifest["metadata"]["project_name"]
3131
]
3232
)
33-
assert loader.sources[0].tests[0].name == "source_test1"
33+
assert (
34+
loader.sources["source.package.my_source.table1"].tests[0].name
35+
== "source_test1"
36+
)
37+
38+
assert loader.snapshots["snapshot.package.snapshot1"].parents == [
39+
loader.models["model.package.model1"]
40+
]
41+
assert loader.models["model.package.model1"].parents == [
42+
loader.models["model.package.model2"],
43+
loader.sources["source.package.my_source.table1"],
44+
loader.snapshots["snapshot.package.snapshot2"],
45+
]
46+
assert loader.models["model.package.model2"].parents == []
47+
assert loader.snapshots["snapshot.package.snapshot2"].parents == [
48+
loader.sources["source.package.my_source.table1"]
49+
]
3450

3551

3652
@patch("dbt_score.models.Path.read_text")
@@ -39,7 +55,7 @@ def test_manifest_select_models_simple(mock_read_text, raw_manifest):
3955
with patch("dbt_score.models.json.loads", return_value=raw_manifest):
4056
manifest_loader = ManifestLoader(Path("some.json"), select=["model1"])
4157

42-
assert [x.name for x in manifest_loader.models] == ["model1"]
58+
assert [x.name for x in manifest_loader.models.values()] == ["model1"]
4359

4460

4561
@patch("dbt_score.models.Path.read_text")
@@ -50,7 +66,7 @@ def test_manifest_select_models_dbt_ls(mock_dbt_ls, mock_read_text, raw_manifest
5066
with patch("dbt_score.models.json.loads", return_value=raw_manifest):
5167
manifest_loader = ManifestLoader(Path("some.json"), select=["+model1"])
5268

53-
assert [x.name for x in manifest_loader.models] == ["model1"]
69+
assert [x.name for x in manifest_loader.models.values()] == ["model1"]
5470
mock_dbt_ls.assert_called_once_with(["+model1"])
5571

5672

0 commit comments

Comments
 (0)