Skip to content

Commit 28b6441

Browse files
authored
Add generic rule: models should implement uniqueness test for their PK (#90)
For models materialized as `table` or `incremental`: - Loop over their columns to extract PK - Loop over their tests to find a uniqueness test matching the PK columns Consider both table- and column-level constraints and tests.
1 parent 9304da2 commit 28b6441

File tree

7 files changed

+91
-2
lines changed

7 files changed

+91
-2
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@ and this project adheres to
1010

1111
- Add debug mode to help writing new rules. (#91)
1212
- Fix tests without metadata. (#88)
13+
- Add new rule to enforce presence of uniqueness test. (#90)
14+
- Add new rule to enforce single-column PK to be defined at column level. (#90)
15+
- Add new rule to enforce 1 column uniqueness test to be defined at column
16+
level. (#90)
17+
- Add `constraints` to the model schema. (#90)
1318

1419
## [0.9.0] - 2024-12-19
1520

docs/rules/filters.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Rule filters
2+
3+
::: dbt_score.rules.filters

mkdocs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ nav:
2828
- Programmatic invocations: programmatic_invocations.md
2929
- Rules:
3030
- rules/generic.md
31+
- rules/filters.md
3132
- Reference:
3233
- reference/cli.md
3334
- reference/config.md

src/dbt_score/models.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,20 @@
1515

1616
@dataclass
1717
class Constraint:
18-
"""Constraint for a column.
18+
"""Constraint for a model or a column.
1919
2020
Attributes:
2121
type: The type of the constraint, e.g. `foreign_key`.
2222
name: The name of the constraint.
2323
expression: The expression of the constraint, e.g. `schema.other_table`.
24+
columns: The columns for the constraint (only for model-level constraints).
2425
_raw_values: The raw values of the constraint in the manifest.
2526
"""
2627

2728
type: str
2829
name: str | None = None
2930
expression: str | None = None
31+
columns: list[str] | None = None
3032
_raw_values: dict[str, Any] = field(default_factory=dict)
3133

3234
@classmethod
@@ -36,6 +38,7 @@ def from_raw_values(cls, raw_values: dict[str, Any]) -> "Constraint":
3638
type=raw_values["type"],
3739
name=raw_values["name"],
3840
expression=raw_values["expression"],
41+
columns=raw_values.get("columns"),
3942
_raw_values=raw_values,
4043
)
4144

@@ -198,6 +201,7 @@ class Model(HasColumnsMixin):
198201
tags: list[str] = field(default_factory=list)
199202
tests: list[Test] = field(default_factory=list)
200203
depends_on: dict[str, list[str]] = field(default_factory=dict)
204+
constraints: list[Constraint] = field(default_factory=list)
201205
_raw_values: dict[str, Any] = field(default_factory=dict)
202206
_raw_test_values: list[dict[str, Any]] = field(default_factory=list)
203207

@@ -232,6 +236,10 @@ def from_node(
232236
.get("column_name")
233237
],
234238
depends_on=node_values["depends_on"],
239+
constraints=[
240+
Constraint.from_raw_values(constraint)
241+
for constraint in node_values["constraints"]
242+
],
235243
_raw_values=node_values,
236244
_raw_test_values=test_values,
237245
)

src/dbt_score/rules/filters.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
"""Rule filters."""
2+
3+
from dbt_score import rule_filter
4+
from dbt_score.models import Model
5+
6+
7+
@rule_filter
8+
def is_table(model: Model) -> bool:
9+
"""Models that are tables."""
10+
return model.config.get("materialized") in {"table", "incremental"}

src/dbt_score/rules/generic.py

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""All generic rules."""
22

33
from dbt_score import Model, RuleViolation, Severity, rule
4+
from dbt_score.rules.filters import is_table
45

56

67
@rule
@@ -47,7 +48,65 @@ def sql_has_reasonable_number_of_lines(
4748
def has_example_sql(model: Model) -> RuleViolation | None:
4849
"""The documentation of a model should have an example query."""
4950
if model.language == "sql":
50-
if "```sql" not in model.description:
51+
if "```sql" not in (model.description or ""):
5152
return RuleViolation(
5253
"The model description does not include an example SQL query."
5354
)
55+
56+
57+
@rule(rule_filters={is_table()})
58+
def single_pk_defined_at_column_level(model: Model) -> RuleViolation | None:
59+
"""Single-column PK must be defined as a column constraint."""
60+
for constraint in model.constraints:
61+
if constraint.type == "primary_key":
62+
if constraint.columns is not None and len(constraint.columns) == 1:
63+
return RuleViolation(
64+
f"Single-column PK {constraint.columns[0]} must be defined as a "
65+
f"column constraint."
66+
)
67+
68+
69+
@rule(rule_filters={is_table()})
70+
def single_column_uniqueness_at_column_level(model: Model) -> RuleViolation | None:
71+
"""Single-column uniqueness test must be defined as a column test."""
72+
for data_test in model.tests:
73+
if data_test.type == "unique_combination_of_columns":
74+
if len(data_test.kwargs.get("combination_of_columns", [])) == 1:
75+
return RuleViolation(
76+
"Single-column uniqueness test must be defined as a column test."
77+
)
78+
79+
80+
@rule(rule_filters={is_table()})
81+
def has_uniqueness_test(model: Model) -> RuleViolation | None:
82+
"""Model has uniqueness test for primary key."""
83+
# ruff: noqa: C901 [too-complex]
84+
85+
# Single-column PK
86+
for column in model.columns:
87+
for column_constraint in column.constraints:
88+
if column_constraint.type == "primary_key":
89+
for data_test in column.tests:
90+
if data_test.type == "unique":
91+
return None
92+
return RuleViolation(
93+
f"No unique constraint defined on PK column {column.name}."
94+
)
95+
96+
# Composite PK
97+
pk_columns: list[str] = []
98+
for model_constraint in model.constraints:
99+
if model_constraint.type == "primary_key":
100+
pk_columns = model_constraint.columns or []
101+
break
102+
103+
if not pk_columns: # No PK, no need for uniqueness test
104+
return None
105+
106+
for data_test in model.tests:
107+
if data_test.type == "unique_combination_of_columns":
108+
if set(data_test.kwargs.get("combination_of_columns")) == set(pk_columns): # type: ignore
109+
return None
110+
return RuleViolation(
111+
f"No uniqueness test defined and matching PK {','.join(pk_columns)}."
112+
)

tests/resources/manifest.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
"tags": []
2727
}
2828
},
29+
"constraints": [],
2930
"package_name": "package",
3031
"database": "db",
3132
"schema": "schema",
@@ -56,6 +57,7 @@
5657
"tags": []
5758
}
5859
},
60+
"constraints": [],
5961
"package_name": "package",
6062
"database": "db",
6163
"schema": "schema",
@@ -86,6 +88,7 @@
8688
"tags": []
8789
}
8890
},
91+
"constraints": [],
8992
"package_name": "package2",
9093
"database": "db",
9194
"schema": "schema",

0 commit comments

Comments
 (0)