From 769dc073ca42fac5432ec283deac1e8e2ca007f4 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Wed, 9 Oct 2024 23:55:38 -0400 Subject: [PATCH 01/41] add sources to manifest.json fixture --- tests/resources/manifest.json | 154 ++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) diff --git a/tests/resources/manifest.json b/tests/resources/manifest.json index 52a90ab..18c5429 100644 --- a/tests/resources/manifest.json +++ b/tests/resources/manifest.json @@ -125,5 +125,159 @@ "resource_type": "test", "package_name": "package" } + }, + "sources": { + "source.package.my_source.table1": { + "database": "source_db", + "schema": "source_schema", + "name": "table1", + "resource_type": "source", + "package_name": "package", + "path": "models/sources/sources.yml", + "original_file_path": "models/sources/sources.yml", + "unique_id": "source.package.my_source.table1", + "fqn": [ + "package", + "my_source", + "table1" + ], + "source_name": "my_source", + "source_description": "An important source table.", + "loader": "Fivetran", + "identifier": "table1", + "quoting": {}, + "loaded_at_field": null, + "freshness": { + "warn_after": { + "count": null, + "period": null + }, + "error_after": { + "count": null, + "period": null + }, + "filter": null + }, + "external": null, + "description": "", + "columns": {}, + "meta": {}, + "source_meta": {}, + "tags": [], + "config": { + "enabled": true + }, + "patch_path": null, + "unrendered_config": {}, + "relation_name": "\"package\".\"my_source\".\"table1\"", + "created_at": 1728529440.4206917 + }, + "source.package.my_source.table2": { + "database": "source_db", + "schema": "source_schema", + "name": "table2", + "resource_type": "source", + "package_name": "package", + "path": "models/sources/sources.yml", + "original_file_path": "models/sources/sources.yml", + "unique_id": "source.package.my_source.table2", + "fqn": [ + "package", + "my_source", + "table2" + ], + "source_name": "my_source", + "source_description": "Another table with some columns declared.", + "loader": "Fivetran", + "identifier": "table2", + "quoting": {}, + "loaded_at_field": null, + "freshness": { + "warn_after": { + "count": null, + "period": null + }, + "error_after": { + "count": null, + "period": null + }, + "filter": null + }, + "external": null, + "description": "", + "columns": { + "a": { + "name": "column_a", + "description": "Column A.", + "data_type": "string", + "meta": {}, + "constraints": [], + "tags": [] + }, + "b": { + "name": "column_b", + "description": "Column B.", + "data_type": "integer", + "meta": {}, + "constraints": [], + "tags": [] + } + }, + "meta": {}, + "source_meta": {}, + "tags": [], + "config": { + "enabled": true + }, + "patch_path": null, + "unrendered_config": {}, + "relation_name": "\"package\".\"my_source\".\"table2\"", + "created_at": 1728529440.4206917 + }, + "source.package.my_other_source.table1": { + "database": "source_db", + "schema": "alternate_schema", + "name": "table1", + "resource_type": "source", + "package_name": "package", + "path": "models/sources/sources.yml", + "original_file_path": "models/sources/sources.yml", + "unique_id": "source.package.my_other_source.table1", + "fqn": [ + "package", + "my_other_source", + "table1" + ], + "source_name": "my_other_source", + "source_description": "A source in a different schema.", + "loader": "Fivetran", + "identifier": "table1", + "quoting": {}, + "loaded_at_field": null, + "freshness": { + "warn_after": { + "count": null, + "period": null + }, + "error_after": { + "count": null, + "period": null + }, + "filter": null + }, + "external": null, + "description": "", + "columns": {}, + "meta": {}, + "source_meta": {}, + "tags": [], + "config": { + "enabled": true + }, + "patch_path": null, + "unrendered_config": {}, + "relation_name": "\"package\".\"my_other_source\".\"table1\"", + "created_at": 1728529440.4206917 + } } } From 40d3e2edfe43c2267c3c6d0737c5ed847561750a Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Wed, 9 Oct 2024 23:58:24 -0400 Subject: [PATCH 02/41] add minimal test assertion for parsing sources --- tests/test_models.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_models.py b/tests/test_models.py index 4300de8..ba94529 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -22,6 +22,14 @@ def test_manifest_load(mock_read_text, raw_manifest): assert loader.models[0].tests[0].name == "test2" assert loader.models[0].columns[0].tests[0].name == "test1" + assert len(loader.sources) == len( + [ + source + for source in raw_manifest["sources"].values() + if source["package_name"] == raw_manifest["metadata"]["project_name"] + ] + ) + @patch("dbt_score.models.Path.read_text") def test_manifest_select_models_simple(mock_read_text, raw_manifest): From 1ee0efc59139cf51fdce3ee79ed7b06b571bba4d Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Thu, 10 Oct 2024 20:44:31 -0400 Subject: [PATCH 03/41] fmt json --- tests/resources/manifest.json | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/tests/resources/manifest.json b/tests/resources/manifest.json index 18c5429..6e5664c 100644 --- a/tests/resources/manifest.json +++ b/tests/resources/manifest.json @@ -136,11 +136,7 @@ "path": "models/sources/sources.yml", "original_file_path": "models/sources/sources.yml", "unique_id": "source.package.my_source.table1", - "fqn": [ - "package", - "my_source", - "table1" - ], + "fqn": ["package", "my_source", "table1"], "source_name": "my_source", "source_description": "An important source table.", "loader": "Fivetran", @@ -181,11 +177,7 @@ "path": "models/sources/sources.yml", "original_file_path": "models/sources/sources.yml", "unique_id": "source.package.my_source.table2", - "fqn": [ - "package", - "my_source", - "table2" - ], + "fqn": ["package", "my_source", "table2"], "source_name": "my_source", "source_description": "Another table with some columns declared.", "loader": "Fivetran", @@ -243,11 +235,7 @@ "path": "models/sources/sources.yml", "original_file_path": "models/sources/sources.yml", "unique_id": "source.package.my_other_source.table1", - "fqn": [ - "package", - "my_other_source", - "table1" - ], + "fqn": ["package", "my_other_source", "table1"], "source_name": "my_other_source", "source_description": "A source in a different schema.", "loader": "Fivetran", From e33d8b8f3c1ac7dd6f03bfffd972603253b58788 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Mon, 14 Oct 2024 21:57:55 -0400 Subject: [PATCH 04/41] add Source model and parse into ManifestLoader --- src/dbt_score/__init__.py | 3 +- src/dbt_score/models.py | 198 ++++++++++++++++++++++++++++++++------ 2 files changed, 173 insertions(+), 28 deletions(-) diff --git a/src/dbt_score/__init__.py b/src/dbt_score/__init__.py index 3f4d3b2..5dcd389 100644 --- a/src/dbt_score/__init__.py +++ b/src/dbt_score/__init__.py @@ -1,11 +1,12 @@ """Init dbt_score package.""" from dbt_score.model_filter import ModelFilter, model_filter -from dbt_score.models import Model +from dbt_score.models import Model, Source from dbt_score.rule import Rule, RuleViolation, Severity, rule __all__ = [ "Model", + "Source", "ModelFilter", "Rule", "RuleViolation", diff --git a/src/dbt_score/models.py b/src/dbt_score/models.py index 910068c..c03815c 100644 --- a/src/dbt_score/models.py +++ b/src/dbt_score/models.py @@ -6,7 +6,7 @@ from collections import defaultdict from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Iterable +from typing import Any, Iterable, Literal, TypeAlias from dbt_score.dbt_utils import dbt_ls @@ -117,8 +117,39 @@ def from_node_values( ) +class HasColumnsMixin: + """Common methods for resource types that have columns.""" + + columns: list[Column] + + def get_column(self, column_name: str) -> Column | None: + """Get a column by name.""" + for column in self.columns: + if column.name == column_name: + return column + + return None + + @staticmethod + def _get_columns( + node_values: dict[str, Any], test_values: list[dict[str, Any]] + ) -> list[Column]: + """Get columns from a node and its tests in the manifest.""" + return [ + Column.from_node_values( + values, + [ + test + for test in test_values + if test["test_metadata"]["kwargs"].get("column_name") == name + ], + ) + for name, values in node_values.get("columns", {}).items() + ] + + @dataclass -class Model: +class Model(HasColumnsMixin): """Represents a dbt model. Attributes: @@ -167,31 +198,6 @@ class Model: _raw_values: dict[str, Any] = field(default_factory=dict) _raw_test_values: list[dict[str, Any]] = field(default_factory=list) - def get_column(self, column_name: str) -> Column | None: - """Get a column by name.""" - for column in self.columns: - if column.name == column_name: - return column - - return None - - @staticmethod - def _get_columns( - node_values: dict[str, Any], test_values: list[dict[str, Any]] - ) -> list[Column]: - """Get columns from a node and its tests in the manifest.""" - return [ - Column.from_node_values( - values, - [ - test - for test in test_values - if test["test_metadata"]["kwargs"].get("column_name") == name - ], - ) - for name, values in node_values.get("columns", {}).items() - ] - @classmethod def from_node( cls, node_values: dict[str, Any], test_values: list[dict[str, Any]] @@ -230,6 +236,129 @@ def __hash__(self) -> int: return hash(self.unique_id) +@dataclass +class Duration: + """Represents a duration used in SourceFreshness. + + This is referred to as `Time` in the dbt JSONSchema. + + Attributes: + count: a positive integer + period: "minute" | "hour" | "day" + """ + + count: int | None = None + period: Literal["minute", "hour", "day"] | None = None + + +@dataclass +class SourceFreshness: + """Represents a source freshness configuration. + + This is referred to as `FreshnessThreshold` in the dbt JSONSchema. + + Attributes: + warn_after: The threshold after which the dbt source freshness check should soft-fail with a warning. + error_after: The threshold after which the dbt source freshness check should fail. + filter: An optional filter to apply to the input data before running source freshness check. + """ + + warn_after: Duration + error_after: Duration + filter: str | None = None + + +@dataclass +class Source(HasColumnsMixin): + """Represents a dbt source table. + + Attributes: + unique_id: The id of the source table, e.g. 'source.package.source_name.source_table_name'. + name: The alias of the source table. + description: The full description of the source table. + source_name: The source namespace. + source_description: The description for the source namespace. + original_file_path: The yml path to the source definition. + config: The config of the source definition. + meta: Any meta-attributes on the source table. + source_meta: Any meta-attribuets on the source namespace. + columns: The list of columns for the source table. + package_name: The dbt package name for the source table. + database: The database name of the source table. + schema: The schema name of the source table. + identifier: The actual source table name, i.e. not an alias. + loader: The tool used to load the source table into the warehouse. + freshness: A set of time thresholds after which data may be considered stale. + patch_path: The yml path of the source definition. + tags: The list of tags attached to the source table. + tests: The list of tests attached to the source table. + _raw_values: The raw values of the source definition in the manifest. + _raw_test_values: The raw test values of the source definition in the manifest. + """ + + unique_id: str + name: str + description: str + source_name: str + source_description: str + original_file_path: str + config: dict[str, Any] + meta: dict[str, Any] + source_meta: dict[str, Any] + columns: list[Column] + package_name: str + database: str + schema: str + identifier: str + loader: str + freshness: SourceFreshness + patch_path: str | None = None + tags: list[str] = field(default_factory=list) + tests: list[Test] = field(default_factory=list) + _raw_values: dict[str, Any] = field(default_factory=dict) + _raw_test_values: list[dict[str, Any]] = field(default_factory=list) + + @classmethod + def from_node( + cls, node_values: dict[str, Any], test_values: list[dict[str, Any]] + ) -> "Source": + """Create a source object from a node and it's tests in the manifest.""" + return cls( + unique_id=node_values["unique_id"], + name=node_values["name"], + description=node_values["description"], + source_name=node_values["source_name"], + source_description=node_values["source_description"], + original_file_path=node_values["original_file_path"], + config=node_values["config"], + meta=node_values["meta"], + source_meta=node_values["source_meta"], + columns=cls._get_columns(node_values, test_values), + package_name=node_values["package_name"], + database=node_values["database"], + schema=node_values["schema"], + identifier=node_values["identifier"], + loader=node_values["loader"], + freshness=node_values["freshness"], + patch_path=node_values["patch_path"], + tags=node_values["tags"], + tests=[ + Test.from_node(test) + for test in test_values + if not test["test_metadata"]["kwargs"].get("column_name") + ], + _raw_values=node_values, + _raw_test_values=test_values, + ) + + def __hash__(self) -> int: + """Compute a unique hash for a source.""" + return hash(self.unique_id) + + +Evaluable: TypeAlias = Model | Source + + class ManifestLoader: """Load the models and tests from the manifest.""" @@ -247,11 +376,19 @@ def __init__(self, file_path: Path, select: Iterable[str] | None = None): for node_id, node_values in self.raw_manifest.get("nodes", {}).items() if node_values["package_name"] == self.project_name } + self.raw_sources = { + source_id: source_values + for source_id, source_values in self.raw_manifest.get("sources", {}).items() + if source_values["package_name"] == self.project_name + } + self.models: list[Model] = [] self.tests: dict[str, list[dict[str, Any]]] = defaultdict(list) + self.sources: list[Source] = [] self._reindex_tests() self._load_models() + self._load_sources() if select: self._select_models(select) @@ -266,6 +403,13 @@ def _load_models(self) -> None: model = Model.from_node(node_values, self.tests.get(node_id, [])) self.models.append(model) + def _load_sources(self) -> None: + """Load the sources from the manifest.""" + for source_id, source_values in self.raw_sources.items(): + if source_values.get("resource_type") == "source": + source = Source.from_node(source_values, self.tests.get(source_id, [])) + self.sources.append(source) + def _reindex_tests(self) -> None: """Index tests based on their model id.""" for node_values in self.raw_nodes.values(): From 480b41ee8d53a3eaecb5597732bfdd84c5d2e5d9 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Mon, 14 Oct 2024 21:59:52 -0400 Subject: [PATCH 05/41] add fixtures for sources --- tests/conftest.py | 76 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 4704d24..5769f57 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Any, Type -from dbt_score import Model, Rule, RuleViolation, Severity, rule +from dbt_score import Model, Rule, RuleViolation, Severity, Source, rule from dbt_score.config import Config from dbt_score.model_filter import ModelFilter, model_filter from dbt_score.models import ManifestLoader @@ -73,6 +73,25 @@ def model2(raw_manifest) -> Model: return Model.from_node(raw_manifest["nodes"]["model.package.model2"], []) +# Sources + + +@fixture +def source1(raw_manifest) -> Model: + """Source 1.""" + return Source.from_node( + raw_manifest["sources"]["source.package.my_source.table1"], [] + ) + + +@fixture +def source2(raw_manifest) -> Model: + """Source 2.""" + return Source.from_node( + raw_manifest["sources"]["source.package.my_source.table2"], [] + ) + + # Multiple ways to create rules @@ -131,6 +150,61 @@ def evaluate(self, model: Model) -> RuleViolation | None: return ExampleRule +@fixture +def decorator_rule_source() -> Type[Rule]: + """An example rule created with the rule decorator.""" + + @rule() + def example_rule_source(source: Source) -> RuleViolation | None: + """Description of the rule.""" + if source.name == "table1": + return RuleViolation(message="Source1 is a violation.") + + return example_rule_source + + +@fixture +def decorator_rule_no_parens_source() -> Type[Rule]: + """An example rule created with the rule decorator without parentheses.""" + + @rule + def example_rule_source(source: Source) -> RuleViolation | None: + """Description of the rule.""" + if source.name == "table1": + return RuleViolation(message="Source1 is a violation.") + + return example_rule_source + + +@fixture +def decorator_rule_args_source() -> Type[Rule]: + """An example rule created with the rule decorator with arguments.""" + + @rule(description="Description of the rule.") + def example_rule_source(source: Source) -> RuleViolation | None: + if source.name == "table1": + return RuleViolation(message="Source1 is a violation.") + + return example_rule_source + + +@fixture +def class_rule_source() -> Type[Rule]: + """An example rule created with a class.""" + + class ExampleRuleSource(Rule): + """Example rule.""" + + description = "Description of the rule." + + def evaluate(self, source: Source) -> RuleViolation | None: + """Evaluate source.""" + if source.name == "table1": + return RuleViolation(message="Source1 is a violation.") + + return ExampleRuleSource + + # Rules From 2fa3d18f56aa267767f9b2dc7951c184e91a58b7 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Mon, 14 Oct 2024 22:02:58 -0400 Subject: [PATCH 06/41] infer resource_type from Rule.evaluate type annotation --- src/dbt_score/rule.py | 30 ++++++++++++++++++++++++++---- tests/test_rule.py | 33 ++++++++++++++++++++++++--------- 2 files changed, 50 insertions(+), 13 deletions(-) diff --git a/src/dbt_score/rule.py b/src/dbt_score/rule.py index 3d68fdc..8daaf9c 100644 --- a/src/dbt_score/rule.py +++ b/src/dbt_score/rule.py @@ -6,8 +6,10 @@ from enum import Enum from typing import Any, Callable, Iterable, Type, TypeAlias, overload +from more_itertools import first_true + from dbt_score.model_filter import ModelFilter -from dbt_score.models import Model +from dbt_score.models import Evaluable class Severity(Enum): @@ -54,7 +56,7 @@ class RuleViolation: message: str | None = None -RuleEvaluationType: TypeAlias = Callable[[Model], RuleViolation | None] +RuleEvaluationType: TypeAlias = Callable[[Evaluable], RuleViolation | None] class Rule: @@ -65,6 +67,7 @@ class Rule: model_filter_names: list[str] model_filters: frozenset[ModelFilter] = frozenset() default_config: typing.ClassVar[dict[str, Any]] = {} + resource_type: typing.ClassVar[Evaluable] def __init__(self, rule_config: RuleConfig | None = None) -> None: """Initialize the rule.""" @@ -78,6 +81,25 @@ def __init_subclass__(cls, **kwargs) -> None: # type: ignore if not hasattr(cls, "description"): raise AttributeError("Subclass must define class attribute `description`.") + cls.resource_type = cls._introspect_resource_type() + + @classmethod + def _introspect_resource_type(cls) -> Type[Evaluable]: + evaluate_func = getattr(cls, "_orig_evaluate", cls.evaluate) + + sig = inspect.signature(evaluate_func) + resource_type_argument = first_true( + sig.parameters.values(), + pred=lambda arg: arg.annotation in typing.get_args(Evaluable), + ) + + if not resource_type_argument: + raise TypeError( + "Subclass must implement method `evaluate` with an annotated Model or Source argument." + ) + + return resource_type_argument.annotation + def process_config(self, rule_config: RuleConfig) -> None: """Process the rule config.""" config = self.default_config.copy() @@ -97,12 +119,12 @@ def process_config(self, rule_config: RuleConfig) -> None: self.model_filter_names = rule_config.model_filter_names self.config = config - def evaluate(self, model: Model) -> RuleViolation | None: + def evaluate(self, model: Evaluable) -> RuleViolation | None: """Evaluates the rule.""" raise NotImplementedError("Subclass must implement method `evaluate`.") @classmethod - def should_evaluate(cls, model: Model) -> bool: + def should_evaluate(cls, model: Evaluable) -> bool: """Checks if all filters in the rule allow evaluation.""" if cls.model_filters: return all(f.evaluate(model) for f in cls.model_filters) diff --git a/tests/test_rule.py b/tests/test_rule.py index 1ed3077..467deee 100644 --- a/tests/test_rule.py +++ b/tests/test_rule.py @@ -1,7 +1,7 @@ """Test rule.""" import pytest -from dbt_score import Model, Rule, RuleViolation, Severity, rule +from dbt_score import Model, Rule, RuleViolation, Severity, Source, rule def test_rule_decorator_and_class( @@ -56,13 +56,28 @@ def evaluate(self, model: Model) -> RuleViolation | None: def test_missing_evaluate_rule_class(model1): """Test missing evaluate implementation in rule class.""" + with pytest.raises(TypeError): - class BadRule(Rule): - """Bad example rule.""" - - description = "Description of the rule." - - rule = BadRule() + class BadRule(Rule): + """Bad example rule.""" - with pytest.raises(NotImplementedError): - rule.evaluate(model1) + description = "Description of the rule." + + +@pytest.mark.parametrize( + "rule_fixture,expected_type", + [ + ("decorator_rule", Model), + ("decorator_rule_no_parens", Model), + ("decorator_rule_args", Model), + ("class_rule", Model), + ("decorator_rule_source", Source), + ("decorator_rule_no_parens_source", Source), + ("decorator_rule_args_source", Source), + ("class_rule_source", Source), + ], +) +def test_rule_introspects_its_resource_type(request, rule_fixture, expected_type): + """Test that each rule is aware of the resource-type that it can be evaluated against.""" + rule = request.getfixturevalue(rule_fixture) + assert rule().resource_type is expected_type From 141aa3cca60b0024e0f60d388e189e874271e699 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Mon, 14 Oct 2024 22:04:52 -0400 Subject: [PATCH 07/41] evaluate rules for both sources and models --- src/dbt_score/evaluation.py | 14 +++++++++----- tests/test_evaluation.py | 37 +++++++++++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/src/dbt_score/evaluation.py b/src/dbt_score/evaluation.py index c583d06..d4adf15 100644 --- a/src/dbt_score/evaluation.py +++ b/src/dbt_score/evaluation.py @@ -2,10 +2,11 @@ from __future__ import annotations +from itertools import chain from typing import Type from dbt_score.formatters import Formatter -from dbt_score.models import ManifestLoader, Model +from dbt_score.models import Evaluable, ManifestLoader from dbt_score.rule import Rule, RuleViolation from dbt_score.rule_registry import RuleRegistry from dbt_score.scoring import Score, Scorer @@ -15,6 +16,7 @@ # - A RuleViolation if a linting error was found # - An Exception if the rule failed to run ModelResultsType = dict[Type[Rule], None | RuleViolation | Exception] +EvaluableResultsType = dict[Type[Rule], None | RuleViolation | Exception] class Evaluation: @@ -41,10 +43,10 @@ def __init__( self._scorer = scorer # For each model, its results - self.results: dict[Model, ModelResultsType] = {} + self.results: dict[Evaluable, EvaluableResultsType] = {} # For each model, its computed score - self.scores: dict[Model, Score] = {} + self.scores: dict[Evaluable, Score] = {} # The aggregated project score self.project_score: Score @@ -53,11 +55,13 @@ def evaluate(self) -> None: """Evaluate all rules.""" rules = self._rule_registry.rules.values() - for model in self._manifest_loader.models: + for model in chain(self._manifest_loader.models, self._manifest_loader.sources): self.results[model] = {} for rule in rules: try: - if rule.should_evaluate(model): # Consider model filter(s). + if rule.resource_type is type(model) and rule.should_evaluate( + model + ): # Consider model filter(s). result = rule.evaluate(model, **rule.config) self.results[model][rule.__class__] = result except Exception as e: diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py index 569124b..ab0f718 100644 --- a/tests/test_evaluation.py +++ b/tests/test_evaluation.py @@ -53,10 +53,10 @@ def test_evaluation_low_medium_high( assert isinstance(evaluation.results[model2][rule_severity_high], RuleViolation) assert isinstance(evaluation.results[model2][rule_error], Exception) - assert mock_formatter.model_evaluated.call_count == 2 + assert mock_formatter.model_evaluated.call_count == 5 assert mock_formatter.project_evaluated.call_count == 1 - assert mock_scorer.score_model.call_count == 2 + assert mock_scorer.score_model.call_count == 5 assert mock_scorer.score_aggregate_models.call_count == 1 @@ -235,3 +235,36 @@ def test_evaluation_with_class_filter( assert class_rule_with_filter not in evaluation.results[model1] assert isinstance(evaluation.results[model2][class_rule_with_filter], RuleViolation) + + +def test_evaluation_with_models_and_sources( + manifest_path, default_config, decorator_rule, decorator_rule_source +): + """Test that model rules are applied to models and source rules are applied to sources.""" + manifest_loader = ManifestLoader(manifest_path) + mock_formatter = Mock() + mock_scorer = Mock() + + rule_registry = RuleRegistry(default_config) + rule_registry._add_rule(decorator_rule) + rule_registry._add_rule(decorator_rule_source) + + # Ensure we get a valid Score object from the Mock + mock_scorer.score_model.return_value = Score(10, "🥇") + + evaluation = Evaluation( + rule_registry=rule_registry, + manifest_loader=manifest_loader, + formatter=mock_formatter, + scorer=mock_scorer, + ) + evaluation.evaluate() + + model1 = manifest_loader.models[0] + source1 = manifest_loader.sources[0] + + assert decorator_rule in evaluation.results[model1] + assert decorator_rule_source not in evaluation.results[model1] + + assert decorator_rule_source in evaluation.results[source1] + assert decorator_rule not in evaluation.results[source1] From 17ef4be54dd939374105d6c22c231af1ef59d089 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Wed, 16 Oct 2024 23:35:23 -0400 Subject: [PATCH 08/41] allow sources to be filtered --- src/dbt_score/model_filter.py | 31 +++++++++++++++++++--- tests/conftest.py | 50 ++++++++++++++++++++++++++++++----- tests/test_evaluation.py | 39 +++++++++++++++++++++------ tests/test_model_filter.py | 31 +++++++++++++++++++++- 4 files changed, 132 insertions(+), 19 deletions(-) diff --git a/src/dbt_score/model_filter.py b/src/dbt_score/model_filter.py index 102a44c..330f81e 100644 --- a/src/dbt_score/model_filter.py +++ b/src/dbt_score/model_filter.py @@ -1,16 +1,20 @@ """Model filtering to choose when to apply specific rules.""" - +import inspect +import typing from typing import Any, Callable, Type, TypeAlias, overload -from dbt_score.models import Model +from more_itertools import first_true + +from dbt_score.models import Evaluable -FilterEvaluationType: TypeAlias = Callable[[Model], bool] +FilterEvaluationType: TypeAlias = Callable[[Evaluable], bool] class ModelFilter: """The Filter base class.""" description: str + resource_type: typing.ClassVar[Evaluable] def __init__(self) -> None: """Initialize the filter.""" @@ -22,7 +26,26 @@ def __init_subclass__(cls, **kwargs) -> None: # type: ignore if not hasattr(cls, "description"): raise AttributeError("Subclass must define class attribute `description`.") - def evaluate(self, model: Model) -> bool: + cls.resource_type = cls._introspect_resource_type() + + @classmethod + def _introspect_resource_type(cls) -> Type[Evaluable]: + evaluate_func = getattr(cls, "_orig_evaluate", cls.evaluate) + + sig = inspect.signature(evaluate_func) + resource_type_argument = first_true( + sig.parameters.values(), + pred=lambda arg: arg.annotation in typing.get_args(Evaluable), + ) + + if not resource_type_argument: + raise TypeError( + "Subclass must implement method `evaluate` with an annotated Model or Source argument." + ) + + return resource_type_argument.annotation + + def evaluate(self, model: Evaluable) -> bool: """Evaluates the filter.""" raise NotImplementedError("Subclass must implement method `evaluate`.") diff --git a/tests/conftest.py b/tests/conftest.py index 5769f57..870e1df 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -288,7 +288,7 @@ def rule_error(model: Model) -> RuleViolation | None: @fixture -def rule_with_filter() -> Type[Rule]: +def model_rule_with_filter() -> Type[Rule]: """An example rule that skips through a filter.""" @model_filter @@ -297,15 +297,32 @@ def skip_model1(model: Model) -> bool: return model.name != "model1" @rule(model_filters={skip_model1()}) - def rule_with_filter(model: Model) -> RuleViolation | None: + def model_rule_with_filter(model: Model) -> RuleViolation | None: """Rule that always fails when not filtered.""" return RuleViolation(message="I always fail.") - return rule_with_filter + return model_rule_with_filter @fixture -def class_rule_with_filter() -> Type[Rule]: +def source_rule_with_filter() -> Type[Rule]: + """An example rule that skips through a filter.""" + + @model_filter + def skip_source1(source: Source) -> bool: + """Skips for source1, passes for source2.""" + return source.name != "table1" + + @rule(model_filters={skip_source1()}) + def source_rule_with_filter(source: Source) -> RuleViolation | None: + """Rule that always fails when not filtered.""" + return RuleViolation(message="I always fail.") + + return source_rule_with_filter + + +@fixture +def model_class_rule_with_filter() -> Type[Rule]: """Using class definitions for filters and rules.""" class SkipModel1(ModelFilter): @@ -315,11 +332,32 @@ def evaluate(self, model: Model) -> bool: """Skips for model1, passes for model2.""" return model.name != "model1" - class RuleWithFilter(Rule): + class ModelRuleWithFilter(Rule): description = "Filter defined by a class." model_filters = frozenset({SkipModel1()}) def evaluate(self, model: Model) -> RuleViolation | None: return RuleViolation(message="I always fail.") - return RuleWithFilter + return ModelRuleWithFilter + + +@fixture +def source_class_rule_with_filter() -> Type[Rule]: + """Using class definitions for filters and rules.""" + + class SkipSource1(ModelFilter): + description = "Filter defined by a class." + + def evaluate(self, source: Source) -> bool: + """Skips for source1, passes for source2.""" + return source.name != "table1" + + class SourceRuleWithFilter(Rule): + description = "Filter defined by a class." + model_filters = frozenset({SkipSource1()}) + + def evaluate(self, source: Source) -> RuleViolation | None: + return RuleViolation(message="I always fail.") + + return SourceRuleWithFilter diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py index ab0f718..f7349fd 100644 --- a/tests/test_evaluation.py +++ b/tests/test_evaluation.py @@ -181,14 +181,17 @@ def test_evaluation_rule_with_config( assert evaluation.results[model2][rule_with_config] is None -def test_evaluation_with_filter(manifest_path, default_config, rule_with_filter): +def test_evaluation_with_filter( + manifest_path, default_config, model_rule_with_filter, source_rule_with_filter +): """Test rule with filter.""" manifest_loader = ManifestLoader(manifest_path) mock_formatter = Mock() mock_scorer = Mock() rule_registry = RuleRegistry(default_config) - rule_registry._add_rule(rule_with_filter) + rule_registry._add_rule(model_rule_with_filter) + rule_registry._add_rule(source_rule_with_filter) # Ensure we get a valid Score object from the Mock mock_scorer.score_model.return_value = Score(10, "🥇") @@ -203,13 +206,23 @@ def test_evaluation_with_filter(manifest_path, default_config, rule_with_filter) model1 = manifest_loader.models[0] model2 = manifest_loader.models[1] + source1 = manifest_loader.sources[0] + source2 = manifest_loader.sources[1] - assert rule_with_filter not in evaluation.results[model1] - assert isinstance(evaluation.results[model2][rule_with_filter], RuleViolation) + assert model_rule_with_filter not in evaluation.results[model1] + assert isinstance(evaluation.results[model2][model_rule_with_filter], RuleViolation) + + assert source_rule_with_filter not in evaluation.results[source1] + assert isinstance( + evaluation.results[source2][source_rule_with_filter], RuleViolation + ) def test_evaluation_with_class_filter( - manifest_path, default_config, class_rule_with_filter + manifest_path, + default_config, + model_class_rule_with_filter, + source_class_rule_with_filter, ): """Test rule with filters and filtered rules defined by classes.""" manifest_loader = ManifestLoader(manifest_path) @@ -217,7 +230,8 @@ def test_evaluation_with_class_filter( mock_scorer = Mock() rule_registry = RuleRegistry(default_config) - rule_registry._add_rule(class_rule_with_filter) + rule_registry._add_rule(model_class_rule_with_filter) + rule_registry._add_rule(source_class_rule_with_filter) # Ensure we get a valid Score object from the Mock mock_scorer.score_model.return_value = Score(10, "🥇") @@ -232,9 +246,18 @@ def test_evaluation_with_class_filter( model1 = manifest_loader.models[0] model2 = manifest_loader.models[1] + source1 = manifest_loader.sources[0] + source2 = manifest_loader.sources[1] - assert class_rule_with_filter not in evaluation.results[model1] - assert isinstance(evaluation.results[model2][class_rule_with_filter], RuleViolation) + assert model_class_rule_with_filter not in evaluation.results[model1] + assert isinstance( + evaluation.results[model2][model_class_rule_with_filter], RuleViolation + ) + + assert source_class_rule_with_filter not in evaluation.results[source1] + assert isinstance( + evaluation.results[source2][source_class_rule_with_filter], RuleViolation + ) def test_evaluation_with_models_and_sources( diff --git a/tests/test_model_filter.py b/tests/test_model_filter.py index 86f1bf1..23ecf97 100644 --- a/tests/test_model_filter.py +++ b/tests/test_model_filter.py @@ -1,7 +1,7 @@ """Test model filters.""" from dbt_score.model_filter import ModelFilter, model_filter -from dbt_score.models import Model +from dbt_score.models import Model, Source def test_basic_filter(model1, model2): @@ -18,6 +18,20 @@ def only_model1(model: Model) -> bool: assert not instance.evaluate(model2) +def test_basic_filter_with_sources(source1, source2): + """Test basic filter testing for a specific model.""" + + @model_filter + def only_source1(source: Source) -> bool: + """Some description.""" + return source.name == "table1" + + instance = only_source1() # since the decorator returns a Type + assert instance.description == "Some description." + assert instance.evaluate(source1) + assert not instance.evaluate(source2) + + def test_class_filter(model1, model2): """Test basic filter using class.""" @@ -31,3 +45,18 @@ def evaluate(self, model: Model) -> bool: assert instance.description == "Some description." assert instance.evaluate(model1) assert not instance.evaluate(model2) + + +def test_class_filter_with_sources(source1, source2): + """Test basic filter using class.""" + + class OnlySource1(ModelFilter): + description = "Some description." + + def evaluate(self, source: Source) -> bool: + return source.name == "table1" + + instance = OnlySource1() + assert instance.description == "Some description." + assert instance.evaluate(source1) + assert not instance.evaluate(source2) From a85ad3abb0ce5b7f5f1cc3c8e4e3dd331e73f85e Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Wed, 16 Oct 2024 23:45:29 -0400 Subject: [PATCH 09/41] coverage --- tests/test_model_filter.py | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/tests/test_model_filter.py b/tests/test_model_filter.py index 23ecf97..6ba1256 100644 --- a/tests/test_model_filter.py +++ b/tests/test_model_filter.py @@ -1,5 +1,5 @@ """Test model filters.""" - +import pytest from dbt_score.model_filter import ModelFilter, model_filter from dbt_score.models import Model, Source @@ -60,3 +60,34 @@ def evaluate(self, source: Source) -> bool: assert instance.description == "Some description." assert instance.evaluate(source1) assert not instance.evaluate(source2) + + +def test_missing_description_rule_filter(): + """Test missing description in filter decorator.""" + with pytest.raises(AttributeError): + + @model_filter() + def example_filter(model: Model) -> bool: + return True + + +def test_missing_description_rule_class(): + """Test missing description in filter class.""" + with pytest.raises(AttributeError): + + class BadFilter(ModelFilter): + """Bad example filter.""" + + def evaluate(self, model: Model) -> bool: + """Evaluate filter.""" + return True + + +def test_missing_evaluate_rule_class(model1): + """Test missing evaluate implementation in filter class.""" + with pytest.raises(TypeError): + + class BadFilter(ModelFilter): + """Bad example filter.""" + + description = "Description of the rule." From 1a50d6b6c82d8b15b3958ca32691b265fb565c02 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Wed, 16 Oct 2024 23:50:15 -0400 Subject: [PATCH 10/41] no cover overload --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 1ce548c..3f36430 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -114,3 +114,6 @@ source = [ [tool.coverage.report] show_missing = true fail_under = 80 +exclude_also = [ + "@overload" +] \ No newline at end of file From 46664196c62009d5d2cecab6a00dfdfef20d0574 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Sat, 19 Oct 2024 18:55:05 -0400 Subject: [PATCH 11/41] replace occurrences of "model" with "evaluable" --- src/dbt_score/cli.py | 12 ++--- src/dbt_score/config.py | 4 +- src/dbt_score/evaluation.py | 35 +++++++------- src/dbt_score/formatters/__init__.py | 10 ++-- src/dbt_score/formatters/ascii_formatter.py | 10 ++-- .../formatters/human_readable_formatter.py | 28 +++++------ src/dbt_score/formatters/json_formatter.py | 20 ++++---- .../formatters/manifest_formatter.py | 20 ++++---- src/dbt_score/scoring.py | 16 +++---- tests/formatters/test_ascii_formatter.py | 6 +-- .../test_human_readable_formatter.py | 20 ++++---- tests/formatters/test_json_formatter.py | 2 +- tests/formatters/test_manifest_formatter.py | 14 +++--- tests/resources/pyproject.toml | 2 +- tests/test_cli.py | 4 +- tests/test_config.py | 2 +- tests/test_evaluation.py | 6 +-- tests/test_scoring.py | 46 +++++++++---------- 18 files changed, 128 insertions(+), 129 deletions(-) diff --git a/src/dbt_score/cli.py b/src/dbt_score/cli.py index 613163e..ac48a42 100644 --- a/src/dbt_score/cli.py +++ b/src/dbt_score/cli.py @@ -88,8 +88,8 @@ def cli() -> None: default=None, ) @click.option( - "--fail_any_model_under", - help="Fail if any model is under this value.", + "--fail_any_evaluable_under", + help="Fail if any evaluable is under this value.", type=float, is_flag=False, default=None, @@ -104,7 +104,7 @@ def lint( manifest: Path, run_dbt_parse: bool, fail_project_under: float, - fail_any_model_under: float, + fail_any_evaluable_under: float, ) -> None: """Lint dbt models metadata.""" manifest_provided = ( @@ -122,8 +122,8 @@ def lint( config.overload({"disabled_rules": disabled_rule}) if fail_project_under: config.overload({"fail_project_under": fail_project_under}) - if fail_any_model_under: - config.overload({"fail_any_model_under": fail_any_model_under}) + if fail_any_evaluable_under: + config.overload({"fail_any_evaluable_under": fail_any_evaluable_under}) try: if run_dbt_parse: @@ -148,7 +148,7 @@ def lint( ctx.exit(2) if ( - any(x.value < config.fail_any_model_under for x in evaluation.scores.values()) + any(x.value < config.fail_any_evaluable_under for x in evaluation.scores.values()) or evaluation.project_score.value < config.fail_project_under ): ctx.exit(1) diff --git a/src/dbt_score/config.py b/src/dbt_score/config.py index 4a4ddf5..fd46e91 100644 --- a/src/dbt_score/config.py +++ b/src/dbt_score/config.py @@ -56,7 +56,7 @@ class Config: "disabled_rules", "inject_cwd_in_python_path", "fail_project_under", - "fail_any_model_under", + "fail_any_evaluable_under", ] _rules_section: Final[str] = "rules" _badges_section: Final[str] = "badges" @@ -70,7 +70,7 @@ def __init__(self) -> None: self.config_file: Path | None = None self.badge_config: BadgeConfig = BadgeConfig() self.fail_project_under: float = 5.0 - self.fail_any_model_under: float = 5.0 + self.fail_any_evaluable_under: float = 5.0 def set_option(self, option: str, value: Any) -> None: """Set an option in the config.""" diff --git a/src/dbt_score/evaluation.py b/src/dbt_score/evaluation.py index d4adf15..861b616 100644 --- a/src/dbt_score/evaluation.py +++ b/src/dbt_score/evaluation.py @@ -11,11 +11,10 @@ from dbt_score.rule_registry import RuleRegistry from dbt_score.scoring import Score, Scorer -# The results of a given model are stored in a dictionary, mapping rules to either: +# The results of a given evaluable are stored in a dictionary, mapping rules to either: # - None if there was no issue # - A RuleViolation if a linting error was found # - An Exception if the rule failed to run -ModelResultsType = dict[Type[Rule], None | RuleViolation | Exception] EvaluableResultsType = dict[Type[Rule], None | RuleViolation | Exception] @@ -33,7 +32,7 @@ def __init__( Args: rule_registry: A rule registry to access rules. - manifest_loader: A manifest loader to access model metadata. + manifest_loader: A manifest loader to access dbt metadata. formatter: A formatter to display results. scorer: A scorer to compute scores. """ @@ -42,10 +41,10 @@ def __init__( self._formatter = formatter self._scorer = scorer - # For each model, its results + # For each evaluable, its results self.results: dict[Evaluable, EvaluableResultsType] = {} - # For each model, its computed score + # For each evaluable, its computed score self.scores: dict[Evaluable, Score] = {} # The aggregated project score @@ -55,28 +54,28 @@ def evaluate(self) -> None: """Evaluate all rules.""" rules = self._rule_registry.rules.values() - for model in chain(self._manifest_loader.models, self._manifest_loader.sources): - self.results[model] = {} + for evaluable in chain(self._manifest_loader.models, self._manifest_loader.sources): + self.results[evaluable] = {} for rule in rules: try: - if rule.resource_type is type(model) and rule.should_evaluate( - model - ): # Consider model filter(s). - result = rule.evaluate(model, **rule.config) - self.results[model][rule.__class__] = result + if rule.resource_type is type(evaluable) and rule.should_evaluate( + evaluable + ): # Consider rule filter(s). + result = rule.evaluate(evaluable, **rule.config) + self.results[evaluable][rule.__class__] = result except Exception as e: - self.results[model][rule.__class__] = e + self.results[evaluable][rule.__class__] = e - self.scores[model] = self._scorer.score_model(self.results[model]) - self._formatter.model_evaluated( - model, self.results[model], self.scores[model] + self.scores[evaluable] = self._scorer.score_evaluable(self.results[evaluable]) + self._formatter.evaluable_evaluated( + evaluable, self.results[evaluable], self.scores[evaluable] ) # Compute score for project - self.project_score = self._scorer.score_aggregate_models( + self.project_score = self._scorer.score_aggregate_evaluables( list(self.scores.values()) ) # Add null check before calling project_evaluated - if self._manifest_loader.models: + if self._manifest_loader.models or self._manifest_loader.sources: self._formatter.project_evaluated(self.project_score) diff --git a/src/dbt_score/formatters/__init__.py b/src/dbt_score/formatters/__init__.py index ff37429..d9f6d46 100644 --- a/src/dbt_score/formatters/__init__.py +++ b/src/dbt_score/formatters/__init__.py @@ -9,8 +9,8 @@ from dbt_score.scoring import Score if typing.TYPE_CHECKING: - from dbt_score.evaluation import ModelResultsType -from dbt_score.models import ManifestLoader, Model + from dbt_score.evaluation import EvaluableResultsType +from dbt_score.models import ManifestLoader, Model, Evaluable class Formatter(ABC): @@ -22,10 +22,10 @@ def __init__(self, manifest_loader: ManifestLoader, config: Config): self._config = config @abstractmethod - def model_evaluated( - self, model: Model, results: ModelResultsType, score: Score + def evaluable_evaluated( + self, evaluable: Evaluable, results: EvaluableResultsType, score: Score ) -> None: - """Callback when a model has been evaluated.""" + """Callback when an evaluable item has been evaluated.""" raise NotImplementedError @abstractmethod diff --git a/src/dbt_score/formatters/ascii_formatter.py b/src/dbt_score/formatters/ascii_formatter.py index 4035dd3..61cda3b 100644 --- a/src/dbt_score/formatters/ascii_formatter.py +++ b/src/dbt_score/formatters/ascii_formatter.py @@ -1,9 +1,9 @@ """ASCII formatter.""" -from dbt_score.evaluation import ModelResultsType +from dbt_score.evaluation import EvaluableResultsType from dbt_score.formatters import Formatter -from dbt_score.models import Model +from dbt_score.models import Evaluable from dbt_score.scoring import Score, Scorer # ruff: noqa: E501 [line-too-long] @@ -66,10 +66,10 @@ class ASCIIFormatter(Formatter): """Formatter for ASCII medals in the terminal.""" - def model_evaluated( - self, model: Model, results: ModelResultsType, score: Score + def evaluable_evaluated( + self, evaluable: Evaluable, results: EvaluableResultsType, score: Score ) -> None: - """Callback when a model has been evaluated.""" + """Callback when an evaluable item has been evaluated.""" pass def project_evaluated(self, score: Score) -> None: diff --git a/src/dbt_score/formatters/human_readable_formatter.py b/src/dbt_score/formatters/human_readable_formatter.py index ba49a53..58f9b1d 100644 --- a/src/dbt_score/formatters/human_readable_formatter.py +++ b/src/dbt_score/formatters/human_readable_formatter.py @@ -2,9 +2,9 @@ from typing import Any -from dbt_score.evaluation import ModelResultsType +from dbt_score.evaluation import EvaluableResultsType from dbt_score.formatters import Formatter -from dbt_score.models import Model +from dbt_score.models import Model, Evaluable from dbt_score.rule import RuleViolation from dbt_score.scoring import Score @@ -20,20 +20,20 @@ class HumanReadableFormatter(Formatter): def __init__(self, *args: Any, **kwargs: Any): """Instantiate formatter.""" super().__init__(*args, **kwargs) - self._failed_models: list[tuple[Model, Score]] = [] + self._failed_evaluables: list[tuple[Evaluable, Score]] = [] @staticmethod def bold(text: str) -> str: """Return text in bold.""" return f"\033[1m{text}\033[0m" - def model_evaluated( - self, model: Model, results: ModelResultsType, score: Score + def evaluable_evaluated( + self, evaluable: Evaluable, results: EvaluableResultsType, score: Score ) -> None: - """Callback when a model has been evaluated.""" - if score.value < self._config.fail_any_model_under: - self._failed_models.append((model, score)) - print(f"{score.badge} {self.bold(model.name)} (score: {score.rounded_value!s})") + """Callback when an evaluable item has been evaluated.""" + if score.value < self._config.fail_any_evaluable_under: + self._failed_evaluables.append((evaluable, score)) + print(f"{score.badge} {self.bold(evaluable.name)} (score: {score.rounded_value!s})") for rule, result in results.items(): if result is None: print(f"{self.indent}{self.label_ok} {rule.source()}") @@ -50,14 +50,14 @@ def project_evaluated(self, score: Score) -> None: """Callback when a project has been evaluated.""" print(f"Project score: {self.bold(str(score.rounded_value))} {score.badge}") - if len(self._failed_models) > 0: + if len(self._failed_evaluables) > 0: print() print( - f"Error: model score too low, fail_any_model_under = " - f"{self._config.fail_any_model_under}" + f"Error: evaluable score too low, fail_any_evaluable_under = " + f"{self._config.fail_any_evaluable_under}" ) - for model, model_score in self._failed_models: - print(f"Model {model.name} scored {model_score.value}") + for evaluable, evaluable_score in self._failed_evaluables: + print(f"Model {evaluable.name} scored {evaluable_score.value}") elif score.value < self._config.fail_project_under: print() diff --git a/src/dbt_score/formatters/json_formatter.py b/src/dbt_score/formatters/json_formatter.py index 29f1fde..2648de6 100644 --- a/src/dbt_score/formatters/json_formatter.py +++ b/src/dbt_score/formatters/json_formatter.py @@ -47,9 +47,9 @@ import json from typing import Any -from dbt_score.evaluation import ModelResultsType +from dbt_score.evaluation import EvaluableResultsType from dbt_score.formatters import Formatter -from dbt_score.models import Model +from dbt_score.models import Evaluable from dbt_score.rule import RuleViolation from dbt_score.scoring import Score @@ -63,32 +63,32 @@ def __init__(self, *args: Any, **kwargs: Any): self._model_results: dict[str, dict[str, Any]] = {} self._project_results: dict[str, Any] - def model_evaluated( - self, model: Model, results: ModelResultsType, score: Score + def evaluable_evaluated( + self, evaluable: Evaluable, results: EvaluableResultsType, score: Score ) -> None: - """Callback when a model has been evaluated.""" - self._model_results[model.name] = { + """Callback when an evaluable item has been evaluated.""" + self._model_results[evaluable.name] = { "score": score.value, "badge": score.badge, - "pass": score.value >= self._config.fail_any_model_under, + "pass": score.value >= self._config.fail_any_evaluable_under, "results": {}, } for rule, result in results.items(): severity = rule.severity.name.lower() if result is None: - self._model_results[model.name]["results"][rule.source()] = { + self._model_results[evaluable.name]["results"][rule.source()] = { "result": "OK", "severity": severity, "message": None, } elif isinstance(result, RuleViolation): - self._model_results[model.name]["results"][rule.source()] = { + self._model_results[evaluable.name]["results"][rule.source()] = { "result": "WARN", "severity": severity, "message": result.message, } else: - self._model_results[model.name]["results"][rule.source()] = { + self._model_results[evaluable.name]["results"][rule.source()] = { "result": "ERR", "severity": severity, "message": str(result), diff --git a/src/dbt_score/formatters/manifest_formatter.py b/src/dbt_score/formatters/manifest_formatter.py index a1914cb..35356b8 100644 --- a/src/dbt_score/formatters/manifest_formatter.py +++ b/src/dbt_score/formatters/manifest_formatter.py @@ -4,9 +4,9 @@ import json from typing import Any -from dbt_score.evaluation import ModelResultsType +from dbt_score.evaluation import EvaluableResultsType from dbt_score.formatters import Formatter -from dbt_score.models import Model +from dbt_score.models import Evaluable from dbt_score.scoring import Score @@ -15,19 +15,19 @@ class ManifestFormatter(Formatter): def __init__(self, *args: Any, **kwargs: Any) -> None: """Instantiate a manifest formatter.""" - self._model_scores: dict[str, Score] = {} + self._evaluable_scores: dict[str, Score] = {} super().__init__(*args, **kwargs) - def model_evaluated( - self, model: Model, results: ModelResultsType, score: Score + def evaluable_evaluated( + self, evaluable: Evaluable, results: EvaluableResultsType, score: Score ) -> None: - """Callback when a model has been evaluated.""" - self._model_scores[model.unique_id] = score + """Callback when an evaluable item has been evaluated.""" + self._evaluable_scores[evaluable.unique_id] = score def project_evaluated(self, score: Score) -> None: """Callback when a project has been evaluated.""" manifest = copy.copy(self._manifest_loader.raw_manifest) - for model_id, model_score in self._model_scores.items(): - manifest["nodes"][model_id]["meta"]["score"] = model_score.value - manifest["nodes"][model_id]["meta"]["badge"] = model_score.badge + for evaluable_id, evaluable_score in self._evaluable_scores.items(): + manifest["nodes"][evaluable_id]["meta"]["score"] = evaluable_score.value + manifest["nodes"][evaluable_id]["meta"]["badge"] = evaluable_score.badge print(json.dumps(manifest, indent=2)) diff --git a/src/dbt_score/scoring.py b/src/dbt_score/scoring.py index 5d80306..0d11f64 100644 --- a/src/dbt_score/scoring.py +++ b/src/dbt_score/scoring.py @@ -9,7 +9,7 @@ from dbt_score.config import Config if typing.TYPE_CHECKING: - from dbt_score.evaluation import ModelResultsType + from dbt_score.evaluation import EvaluableResultsType from dbt_score.rule import RuleViolation, Severity @@ -43,16 +43,16 @@ def __init__(self, config: Config) -> None: """Create a Scorer object.""" self._config = config - def score_model(self, model_results: ModelResultsType) -> Score: - """Compute the score of a given model.""" - rule_count = len(model_results) + def score_evaluable(self, evaluable_results: EvaluableResultsType) -> Score: + """Compute the score of a given evaluable.""" + rule_count = len(evaluable_results) if rule_count == 0: # No rule? No problem score = self.max_score elif any( rule.severity == Severity.CRITICAL and isinstance(result, RuleViolation) - for rule, result in model_results.items() + for rule, result in evaluable_results.items() ): # If there's a CRITICAL violation, the score is 0 score = self.min_score @@ -65,7 +65,7 @@ def score_model(self, model_results: ModelResultsType) -> Score: self.score_cardinality - rule.severity.value if isinstance(result, RuleViolation) # Either 0/3, 1/3 or 2/3 else self.score_cardinality # 3/3 - for rule, result in model_results.items() + for rule, result in evaluable_results.items() ] ) / (self.score_cardinality * rule_count) @@ -74,8 +74,8 @@ def score_model(self, model_results: ModelResultsType) -> Score: return Score(score, self._badge(score)) - def score_aggregate_models(self, scores: list[Score]) -> Score: - """Compute the score of a list of models.""" + def score_aggregate_evaluables(self, scores: list[Score]) -> Score: + """Compute the score of a list of evaluables.""" actual_scores = [s.value for s in scores] if 0.0 in actual_scores: # Any model with a CRITICAL violation makes the project score 0 diff --git a/tests/formatters/test_ascii_formatter.py b/tests/formatters/test_ascii_formatter.py index 2ab4b41..f7c7dc9 100644 --- a/tests/formatters/test_ascii_formatter.py +++ b/tests/formatters/test_ascii_formatter.py @@ -1,7 +1,7 @@ """Unit tests for the ASCII formatter.""" -from dbt_score.evaluation import ModelResultsType +from dbt_score.evaluation import EvaluableResultsType from dbt_score.formatters.ascii_formatter import ASCIIFormatter from dbt_score.rule import RuleViolation from dbt_score.scoring import Score @@ -18,12 +18,12 @@ def test_ascii_formatter_model( ): """Ensure the formatter doesn't write anything after model evaluation.""" formatter = ASCIIFormatter(manifest_loader=manifest_loader, config=default_config) - results: ModelResultsType = { + results: EvaluableResultsType = { rule_severity_low: None, rule_severity_medium: Exception("Oh noes"), rule_severity_critical: RuleViolation("Error"), } - formatter.model_evaluated(model1, results, Score(10.0, "🥇")) + formatter.evaluable_evaluated(model1, results, Score(10.0, "🥇")) stdout = capsys.readouterr().out assert stdout == "" diff --git a/tests/formatters/test_human_readable_formatter.py b/tests/formatters/test_human_readable_formatter.py index 6a3438a..1ff26e2 100644 --- a/tests/formatters/test_human_readable_formatter.py +++ b/tests/formatters/test_human_readable_formatter.py @@ -1,6 +1,6 @@ """Unit tests for the human readable formatter.""" -from dbt_score.evaluation import ModelResultsType +from dbt_score.evaluation import EvaluableResultsType from dbt_score.formatters.human_readable_formatter import HumanReadableFormatter from dbt_score.rule import RuleViolation from dbt_score.scoring import Score @@ -19,12 +19,12 @@ def test_human_readable_formatter_model( formatter = HumanReadableFormatter( manifest_loader=manifest_loader, config=default_config ) - results: ModelResultsType = { + results: EvaluableResultsType = { rule_severity_low: None, rule_severity_medium: Exception("Oh noes"), rule_severity_critical: RuleViolation("Error"), } - formatter.model_evaluated(model1, results, Score(10.0, "🥇")) + formatter.evaluable_evaluated(model1, results, Score(10.0, "🥇")) stdout = capsys.readouterr().out assert ( stdout @@ -60,12 +60,12 @@ def test_human_readable_formatter_near_perfect_model_score( formatter = HumanReadableFormatter( manifest_loader=manifest_loader, config=default_config ) - results: ModelResultsType = { + results: EvaluableResultsType = { rule_severity_low: None, rule_severity_medium: Exception("Oh noes"), rule_severity_critical: RuleViolation("Error"), } - formatter.model_evaluated(model1, results, Score(9.99, "🥈")) + formatter.evaluable_evaluated(model1, results, Score(9.99, "🥈")) stdout = capsys.readouterr().out assert ( stdout @@ -101,10 +101,10 @@ def test_human_readable_formatter_low_model_score( formatter = HumanReadableFormatter( manifest_loader=manifest_loader, config=default_config ) - results: ModelResultsType = { + results: EvaluableResultsType = { rule_severity_critical: RuleViolation("Error"), } - formatter.model_evaluated(model1, results, Score(0.0, "🚧")) + formatter.evaluable_evaluated(model1, results, Score(0.0, "🚧")) formatter.project_evaluated(Score(0.0, "🚧")) stdout = capsys.readouterr().out print() @@ -115,7 +115,7 @@ def test_human_readable_formatter_low_model_score( Project score: \x1B[1m0.0\x1B[0m 🚧 -Error: model score too low, fail_any_model_under = 5.0 +Error: evaluable score too low, fail_any_evaluable_under = 5.0 Model model1 scored 0.0 """ ) @@ -132,10 +132,10 @@ def test_human_readable_formatter_low_project_score( formatter = HumanReadableFormatter( manifest_loader=manifest_loader, config=default_config ) - results: ModelResultsType = { + results: EvaluableResultsType = { rule_severity_critical: RuleViolation("Error"), } - formatter.model_evaluated(model1, results, Score(10.0, "🥇")) + formatter.evaluable_evaluated(model1, results, Score(10.0, "🥇")) formatter.project_evaluated(Score(0.0, "🚧")) stdout = capsys.readouterr().out print() diff --git a/tests/formatters/test_json_formatter.py b/tests/formatters/test_json_formatter.py index 1eec69c..aca8691 100644 --- a/tests/formatters/test_json_formatter.py +++ b/tests/formatters/test_json_formatter.py @@ -23,7 +23,7 @@ def test_json_formatter( rule_severity_medium: Exception("Oh noes"), rule_severity_critical: RuleViolation("Error"), } - formatter.model_evaluated(model1, results, Score(10.0, "🥇")) + formatter.evaluable_evaluated(model1, results, Score(10.0, "🥇")) formatter.project_evaluated(Score(10.0, "🥇")) stdout = capsys.readouterr().out assert ( diff --git a/tests/formatters/test_manifest_formatter.py b/tests/formatters/test_manifest_formatter.py index baaacc8..e8d5527 100644 --- a/tests/formatters/test_manifest_formatter.py +++ b/tests/formatters/test_manifest_formatter.py @@ -2,7 +2,7 @@ import json -from dbt_score.evaluation import ModelResultsType +from dbt_score.evaluation import EvaluableResultsType from dbt_score.formatters.manifest_formatter import ManifestFormatter from dbt_score.rule import RuleViolation from dbt_score.scoring import Score @@ -21,12 +21,12 @@ def test_manifest_formatter_model( formatter = ManifestFormatter( manifest_loader=manifest_loader, config=default_config ) - results: ModelResultsType = { + results: EvaluableResultsType = { rule_severity_low: None, rule_severity_medium: Exception("Oh noes"), rule_severity_critical: RuleViolation("Error"), } - formatter.model_evaluated(model1, results, Score(10.0, "🥇")) + formatter.evaluable_evaluated(model1, results, Score(10.0, "🥇")) stdout = capsys.readouterr().out assert stdout == "" @@ -45,19 +45,19 @@ def test_manifest_formatter_project( formatter = ManifestFormatter( manifest_loader=manifest_loader, config=default_config ) - result1: ModelResultsType = { + result1: EvaluableResultsType = { rule_severity_low: None, rule_severity_medium: Exception("Oh noes"), rule_severity_critical: RuleViolation("Error"), } - result2: ModelResultsType = { + result2: EvaluableResultsType = { rule_severity_low: None, rule_severity_medium: None, rule_severity_critical: None, } - formatter.model_evaluated(model1, result1, Score(5.0, "🚧")) - formatter.model_evaluated(model2, result2, Score(10.0, "🥇")) + formatter.evaluable_evaluated(model1, result1, Score(5.0, "🚧")) + formatter.evaluable_evaluated(model2, result2, Score(10.0, "🥇")) formatter.project_evaluated(Score(7.5, "🥉")) stdout = capsys.readouterr().out new_manifest = json.loads(stdout) diff --git a/tests/resources/pyproject.toml b/tests/resources/pyproject.toml index cdd6cdb..f8bcbfb 100644 --- a/tests/resources/pyproject.toml +++ b/tests/resources/pyproject.toml @@ -2,7 +2,7 @@ rule_namespaces = ["foo", "tests"] disabled_rules = ["foo.foo", "tests.bar"] fail_project_under = 7.5 -fail_any_model_under = 6.9 +fail_any_evaluable_under = 6.9 [tool.dbt-score.badges] wip.icon = "🏗️" diff --git a/tests/test_cli.py b/tests/test_cli.py index 69f68f0..c4f5d4e 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -110,10 +110,10 @@ def test_fail_any_model_under(manifest_path): with patch("dbt_score.cli.Config._load_toml_file"): runner = CliRunner() result = runner.invoke( - lint, ["--manifest", manifest_path, "--fail_any_model_under", "10.0"] + lint, ["--manifest", manifest_path, "--fail_any_evaluable_under", "10.0"] ) assert "model1" in result.output assert "model2" in result.output - assert "Error: model score too low, fail_any_model_under" in result.stdout + assert "Error: evaluable score too low, fail_any_evaluable_under" in result.stdout assert result.exit_code == 1 diff --git a/tests/test_config.py b/tests/test_config.py index 634e99a..d8ea9da 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -26,7 +26,7 @@ def test_load_valid_toml_file(valid_config_path): assert config.badge_config.second.icon == "2️⃣" assert config.badge_config.first.icon == "1️⃣" assert config.fail_project_under == 7.5 - assert config.fail_any_model_under == 6.9 + assert config.fail_any_evaluable_under == 6.9 assert config.rules_config[ "tests.rules.example.rule_test_example" ].model_filter_names == ["tests.rules.example.skip_model1"] diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py index f7349fd..1633b08 100644 --- a/tests/test_evaluation.py +++ b/tests/test_evaluation.py @@ -53,11 +53,11 @@ def test_evaluation_low_medium_high( assert isinstance(evaluation.results[model2][rule_severity_high], RuleViolation) assert isinstance(evaluation.results[model2][rule_error], Exception) - assert mock_formatter.model_evaluated.call_count == 5 + assert mock_formatter.evaluable_evaluated.call_count == 5 assert mock_formatter.project_evaluated.call_count == 1 - assert mock_scorer.score_model.call_count == 5 - assert mock_scorer.score_aggregate_models.call_count == 1 + assert mock_scorer.score_evaluable.call_count == 5 + assert mock_scorer.score_aggregate_evaluables.call_count == 1 def test_evaluation_critical( diff --git a/tests/test_scoring.py b/tests/test_scoring.py index e47a493..e355435 100644 --- a/tests/test_scoring.py +++ b/tests/test_scoring.py @@ -8,16 +8,16 @@ def test_scorer_model_no_results(default_config): """Test scorer with a model without any result.""" scorer = Scorer(config=default_config) - assert scorer.score_model({}).value == 10.0 + assert scorer.score_evaluable({}).value == 10.0 def test_scorer_model_severity_low(default_config, rule_severity_low): """Test scorer with a model and one low severity rule.""" scorer = Scorer(config=default_config) - assert scorer.score_model({rule_severity_low: None}).value == 10.0 - assert scorer.score_model({rule_severity_low: Exception()}).value == 10.0 + assert scorer.score_evaluable({rule_severity_low: None}).value == 10.0 + assert scorer.score_evaluable({rule_severity_low: Exception()}).value == 10.0 assert ( - round(scorer.score_model({rule_severity_low: RuleViolation("error")}).value, 2) + round(scorer.score_evaluable({rule_severity_low: RuleViolation("error")}).value, 2) == 6.67 ) @@ -25,11 +25,11 @@ def test_scorer_model_severity_low(default_config, rule_severity_low): def test_scorer_model_severity_medium(default_config, rule_severity_medium): """Test scorer with a model and one medium severity rule.""" scorer = Scorer(config=default_config) - assert scorer.score_model({rule_severity_medium: None}).value == 10.0 - assert scorer.score_model({rule_severity_medium: Exception()}).value == 10.0 + assert scorer.score_evaluable({rule_severity_medium: None}).value == 10.0 + assert scorer.score_evaluable({rule_severity_medium: Exception()}).value == 10.0 assert ( round( - scorer.score_model({rule_severity_medium: RuleViolation("error")}).value, 2 + scorer.score_evaluable({rule_severity_medium: RuleViolation("error")}).value, 2 ) == 3.33 ) @@ -38,18 +38,18 @@ def test_scorer_model_severity_medium(default_config, rule_severity_medium): def test_scorer_model_severity_high(default_config, rule_severity_high): """Test scorer with a model and one high severity rule.""" scorer = Scorer(config=default_config) - assert scorer.score_model({rule_severity_high: None}).value == 10.0 - assert scorer.score_model({rule_severity_high: Exception()}).value == 10.0 - assert scorer.score_model({rule_severity_high: RuleViolation("error")}).value == 0.0 + assert scorer.score_evaluable({rule_severity_high: None}).value == 10.0 + assert scorer.score_evaluable({rule_severity_high: Exception()}).value == 10.0 + assert scorer.score_evaluable({rule_severity_high: RuleViolation("error")}).value == 0.0 def test_scorer_model_severity_critical(default_config, rule_severity_critical): """Test scorer with a model and one critical severity rule.""" scorer = Scorer(config=default_config) - assert scorer.score_model({rule_severity_critical: None}).value == 10.0 - assert scorer.score_model({rule_severity_critical: Exception()}).value == 10.0 + assert scorer.score_evaluable({rule_severity_critical: None}).value == 10.0 + assert scorer.score_evaluable({rule_severity_critical: Exception()}).value == 10.0 assert ( - scorer.score_model({rule_severity_critical: RuleViolation("error")}).value + scorer.score_evaluable({rule_severity_critical: RuleViolation("error")}).value == 0.0 ) @@ -60,7 +60,7 @@ def test_scorer_model_severity_critical_overwrites( """Test scorer with a model and multiple rules including one critical.""" scorer = Scorer(config=default_config) assert ( - scorer.score_model( + scorer.score_evaluable( {rule_severity_low: None, rule_severity_critical: RuleViolation("error")} ).value == 0.0 @@ -74,7 +74,7 @@ def test_scorer_model_multiple_rules( scorer = Scorer(config=default_config) assert ( round( - scorer.score_model( + scorer.score_evaluable( { rule_severity_low: None, rule_severity_medium: Exception(), @@ -88,7 +88,7 @@ def test_scorer_model_multiple_rules( assert ( round( - scorer.score_model( + scorer.score_evaluable( { rule_severity_low: Exception(), rule_severity_medium: RuleViolation("error"), @@ -102,7 +102,7 @@ def test_scorer_model_multiple_rules( assert ( round( - scorer.score_model( + scorer.score_evaluable( { rule_severity_low: RuleViolation("error"), rule_severity_medium: Exception(), @@ -118,39 +118,39 @@ def test_scorer_model_multiple_rules( def test_scorer_aggregate_empty(default_config): """Test scorer aggregation with no results.""" scorer = Scorer(config=default_config) - assert scorer.score_aggregate_models([]).value == 10.0 + assert scorer.score_aggregate_evaluables([]).value == 10.0 def test_scorer_aggregate_with_0(default_config): """Test scorer aggregation with one result that is 0.0.""" scorer = Scorer(config=default_config) scores = [Score(1.0, ""), Score(5.0, ""), Score(0.0, "")] - assert scorer.score_aggregate_models(scores).value == 0.0 + assert scorer.score_aggregate_evaluables(scores).value == 0.0 def test_scorer_aggregate_single(default_config): """Test scorer aggregation with a single results.""" scorer = Scorer(config=default_config) - assert scorer.score_aggregate_models([Score(4.2, "")]).value == 4.2 + assert scorer.score_aggregate_evaluables([Score(4.2, "")]).value == 4.2 def test_scorer_aggregate_multiple(default_config): """Test scorer aggregation with multiple results.""" scorer = Scorer(config=default_config) assert ( - scorer.score_aggregate_models( + scorer.score_aggregate_evaluables( [Score(1.0, ""), Score(1.0, ""), Score(1.0, "")] ).value == 1.0 ) assert ( - scorer.score_aggregate_models( + scorer.score_aggregate_evaluables( [Score(1.0, ""), Score(7.4, ""), Score(4.2, "")] ).value == 4.2 ) assert ( - scorer.score_aggregate_models( + scorer.score_aggregate_evaluables( [Score(0.0, ""), Score(0.0, ""), Score(0.0, "")] ).value == 0.0 From 213f06568b188f42edc1e95c1cd08e8a539ff59c Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Sat, 19 Oct 2024 19:11:58 -0400 Subject: [PATCH 12/41] refactor model_filter as rule_filter --- src/dbt_score/__init__.py | 6 ++-- src/dbt_score/rule.py | 36 +++++++++---------- .../{model_filter.py => rule_filter.py} | 36 +++++++++---------- src/dbt_score/rule_registry.py | 32 ++++++++--------- tests/conftest.py | 18 +++++----- tests/resources/pyproject.toml | 2 +- tests/rules/example.py | 4 +-- tests/test_config.py | 2 +- tests/test_model_filter.py | 16 ++++----- tests/test_rule_registry.py | 2 +- 10 files changed, 77 insertions(+), 77 deletions(-) rename src/dbt_score/{model_filter.py => rule_filter.py} (80%) diff --git a/src/dbt_score/__init__.py b/src/dbt_score/__init__.py index 5dcd389..9a8aa59 100644 --- a/src/dbt_score/__init__.py +++ b/src/dbt_score/__init__.py @@ -1,16 +1,16 @@ """Init dbt_score package.""" -from dbt_score.model_filter import ModelFilter, model_filter +from dbt_score.rule_filter import RuleFilter, rule_filter from dbt_score.models import Model, Source from dbt_score.rule import Rule, RuleViolation, Severity, rule __all__ = [ "Model", "Source", - "ModelFilter", + "RuleFilter", "Rule", "RuleViolation", "Severity", - "model_filter", + "rule_filter", "rule", ] diff --git a/src/dbt_score/rule.py b/src/dbt_score/rule.py index 8daaf9c..6f60ea1 100644 --- a/src/dbt_score/rule.py +++ b/src/dbt_score/rule.py @@ -8,7 +8,7 @@ from more_itertools import first_true -from dbt_score.model_filter import ModelFilter +from dbt_score.rule_filter import RuleFilter from dbt_score.models import Evaluable @@ -27,7 +27,7 @@ class RuleConfig: severity: Severity | None = None config: dict[str, Any] = field(default_factory=dict) - model_filter_names: list[str] = field(default_factory=list) + rule_filter_names: list[str] = field(default_factory=list) @staticmethod def from_dict(rule_config: dict[str, Any]) -> "RuleConfig": @@ -39,13 +39,13 @@ def from_dict(rule_config: dict[str, Any]) -> "RuleConfig": else None ) filter_names = ( - config.pop("model_filter_names", None) - if "model_filter_names" in rule_config + config.pop("rule_filter_names", None) + if "rule_filter_names" in rule_config else [] ) return RuleConfig( - severity=severity, config=config, model_filter_names=filter_names + severity=severity, config=config, rule_filter_names=filter_names ) @@ -64,8 +64,8 @@ class Rule: description: str severity: Severity = Severity.MEDIUM - model_filter_names: list[str] - model_filters: frozenset[ModelFilter] = frozenset() + rule_filter_names: list[str] + rule_filters: frozenset[RuleFilter] = frozenset() default_config: typing.ClassVar[dict[str, Any]] = {} resource_type: typing.ClassVar[Evaluable] @@ -116,18 +116,18 @@ def process_config(self, rule_config: RuleConfig) -> None: self.set_severity( rule_config.severity ) if rule_config.severity else rule_config.severity - self.model_filter_names = rule_config.model_filter_names + self.rule_filter_names = rule_config.rule_filter_names self.config = config - def evaluate(self, model: Evaluable) -> RuleViolation | None: + def evaluate(self, evaluable: Evaluable) -> RuleViolation | None: """Evaluates the rule.""" raise NotImplementedError("Subclass must implement method `evaluate`.") @classmethod - def should_evaluate(cls, model: Evaluable) -> bool: + def should_evaluate(cls, evaluable: Evaluable) -> bool: """Checks if all filters in the rule allow evaluation.""" - if cls.model_filters: - return all(f.evaluate(model) for f in cls.model_filters) + if cls.rule_filters: + return all(f.evaluate(evaluable) for f in cls.rule_filters) return True @classmethod @@ -136,9 +136,9 @@ def set_severity(cls, severity: Severity) -> None: cls.severity = severity @classmethod - def set_filters(cls, model_filters: Iterable[ModelFilter]) -> None: + def set_filters(cls, rule_filters: Iterable[RuleFilter]) -> None: """Set the filters of the rule.""" - cls.model_filters = frozenset(model_filters) + cls.rule_filters = frozenset(rule_filters) @classmethod def source(cls) -> str: @@ -164,7 +164,7 @@ def rule( *, description: str | None = None, severity: Severity = Severity.MEDIUM, - model_filters: set[ModelFilter] | None = None, + rule_filters: set[RuleFilter] | None = None, ) -> Callable[[RuleEvaluationType], Type[Rule]]: ... @@ -174,7 +174,7 @@ def rule( *, description: str | None = None, severity: Severity = Severity.MEDIUM, - model_filters: set[ModelFilter] | None = None, + rule_filters: set[RuleFilter] | None = None, ) -> Type[Rule] | Callable[[RuleEvaluationType], Type[Rule]]: """Rule decorator. @@ -188,7 +188,7 @@ def rule( __func: The rule evaluation function being decorated. description: The description of the rule. severity: The severity of the rule. - model_filters: Set of ModelFilter that filters the rule. + rule_filters: Set of RuleFilter that filters the items that the rule applies to. """ def decorator_rule( @@ -221,7 +221,7 @@ def wrapped_func(self: Rule, *args: Any, **kwargs: Any) -> RuleViolation | None: { "description": rule_description, "severity": severity, - "model_filters": model_filters or frozenset(), + "rule_filters": rule_filters or frozenset(), "default_config": default_config, "evaluate": wrapped_func, # Save provided evaluate function diff --git a/src/dbt_score/model_filter.py b/src/dbt_score/rule_filter.py similarity index 80% rename from src/dbt_score/model_filter.py rename to src/dbt_score/rule_filter.py index 330f81e..cf859d2 100644 --- a/src/dbt_score/model_filter.py +++ b/src/dbt_score/rule_filter.py @@ -10,7 +10,7 @@ FilterEvaluationType: TypeAlias = Callable[[Evaluable], bool] -class ModelFilter: +class RuleFilter: """The Filter base class.""" description: str @@ -45,7 +45,7 @@ def _introspect_resource_type(cls) -> Type[Evaluable]: return resource_type_argument.annotation - def evaluate(self, model: Evaluable) -> bool: + def evaluate(self, evaluable: Evaluable) -> bool: """Evaluates the filter.""" raise NotImplementedError("Subclass must implement method `evaluate`.") @@ -64,31 +64,31 @@ def __hash__(self) -> int: @overload -def model_filter(__func: FilterEvaluationType) -> Type[ModelFilter]: +def rule_filter(__func: FilterEvaluationType) -> Type[RuleFilter]: ... @overload -def model_filter( +def rule_filter( *, description: str | None = None, -) -> Callable[[FilterEvaluationType], Type[ModelFilter]]: +) -> Callable[[FilterEvaluationType], Type[RuleFilter]]: ... -def model_filter( +def rule_filter( __func: FilterEvaluationType | None = None, *, description: str | None = None, -) -> Type[ModelFilter] | Callable[[FilterEvaluationType], Type[ModelFilter]]: - """Model-filter decorator. +) -> Type[RuleFilter] | Callable[[FilterEvaluationType], Type[RuleFilter]]: + """Rule-filter decorator. - The model-filter decorator creates a filter class (subclass of ModelFilter) + The rule_filter decorator creates a filter class (subclass of RuleFilter) and returns it. Using arguments or not are both supported: - - ``@model_filter`` - - ``@model_filter(description="...")`` + - ``@rule_filter`` + - ``@rule_filter(description="...")`` Args: __func: The filter evaluation function being decorated. @@ -97,11 +97,11 @@ def model_filter( def decorator_filter( func: FilterEvaluationType, - ) -> Type[ModelFilter]: + ) -> Type[RuleFilter]: """Decorator function.""" if func.__doc__ is None and description is None: raise AttributeError( - "ModelFilter must define `description` or `func.__doc__`." + "RuleFilter must define `description` or `func.__doc__`." ) # Get description parameter, otherwise use the docstring @@ -109,14 +109,14 @@ def decorator_filter( func.__doc__.split("\n")[0] if func.__doc__ else None ) - def wrapped_func(self: ModelFilter, *args: Any, **kwargs: Any) -> bool: + def wrapped_func(self: RuleFilter, *args: Any, **kwargs: Any) -> bool: """Wrap func to add `self`.""" return func(*args, **kwargs) - # Create the filter class inheriting from ModelFilter + # Create the filter class inheriting from RuleFilter filter_class = type( func.__name__, - (ModelFilter,), + (RuleFilter,), { "description": filter_description, "evaluate": wrapped_func, @@ -131,8 +131,8 @@ def wrapped_func(self: ModelFilter, *args: Any, **kwargs: Any) -> bool: return filter_class if __func is not None: - # The syntax @model_filter is used + # The syntax @rule_filter is used return decorator_filter(__func) else: - # The syntax @model_filter(...) is used + # The syntax @rule_filter(...) is used return decorator_filter diff --git a/src/dbt_score/rule_registry.py b/src/dbt_score/rule_registry.py index 0e4557a..da4cf56 100644 --- a/src/dbt_score/rule_registry.py +++ b/src/dbt_score/rule_registry.py @@ -12,7 +12,7 @@ from dbt_score.config import Config from dbt_score.exceptions import DuplicatedRuleException -from dbt_score.model_filter import ModelFilter +from dbt_score.rule_filter import RuleFilter from dbt_score.rule import Rule, RuleConfig logger = logging.getLogger(__name__) @@ -25,7 +25,7 @@ def __init__(self, config: Config) -> None: """Instantiate a rule registry.""" self.config = config self._rules: dict[str, Rule] = {} - self._model_filters: dict[str, ModelFilter] = {} + self._rule_filters: dict[str, RuleFilter] = {} @property def rules(self) -> dict[str, Rule]: @@ -33,9 +33,9 @@ def rules(self) -> dict[str, Rule]: return self._rules @property - def model_filters(self) -> dict[str, ModelFilter]: + def rule_filters(self) -> dict[str, RuleFilter]: """Get all filters.""" - return self._model_filters + return self._rule_filters def _walk_packages(self, namespace_name: str) -> Iterator[str]: """Walk packages and sub-packages recursively.""" @@ -66,8 +66,8 @@ def _load(self, namespace_name: str) -> None: self._add_rule(obj) if ( type(obj) is type - and issubclass(obj, ModelFilter) - and obj is not ModelFilter + and issubclass(obj, RuleFilter) + and obj is not RuleFilter ): self._add_filter(obj) @@ -80,12 +80,12 @@ def _add_rule(self, rule: Type[Rule]) -> None: rule_config = self.config.rules_config.get(rule_name, RuleConfig()) self._rules[rule_name] = rule(rule_config=rule_config) - def _add_filter(self, model_filter: Type[ModelFilter]) -> None: + def _add_filter(self, rule_filter: Type[RuleFilter]) -> None: """Initialize and add a filter.""" - filter_name = model_filter.source() - if filter_name in self._model_filters: + filter_name = rule_filter.source() + if filter_name in self._rule_filters: raise DuplicatedRuleException(filter_name) - self._model_filters[filter_name] = model_filter() + self._rule_filters[filter_name] = rule_filter() def load_all(self) -> None: """Load all rules, core and third-party.""" @@ -103,17 +103,17 @@ def load_all(self) -> None: self._load_filters_into_rules() def _load_filters_into_rules(self) -> None: - """Loads ModelFilters into Rule objects. + """Loads RuleFilters into Rule objects. - If the config of the rule has filter names in the `model_filter_names` key, - load those filters from the rule registry into the actual `model_filters` field. + If the config of the rule has filter names in the `rule_filter_names` key, + load those filters from the rule registry into the actual `rule_filters` field. Configuration overwrites any pre-existing filters. """ for rule in self._rules.values(): - filter_names: list[str] = rule.model_filter_names or [] + filter_names: list[str] = rule.rule_filter_names or [] if len(filter_names) > 0: rule.set_filters( - model_filter - for name, model_filter in self.model_filters.items() + rule_filter + for name, rule_filter in self.rule_filters.items() if name in filter_names ) diff --git a/tests/conftest.py b/tests/conftest.py index 870e1df..49d8703 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,7 +6,7 @@ from dbt_score import Model, Rule, RuleViolation, Severity, Source, rule from dbt_score.config import Config -from dbt_score.model_filter import ModelFilter, model_filter +from dbt_score.rule_filter import RuleFilter, rule_filter from dbt_score.models import ManifestLoader from pytest import fixture @@ -291,12 +291,12 @@ def rule_error(model: Model) -> RuleViolation | None: def model_rule_with_filter() -> Type[Rule]: """An example rule that skips through a filter.""" - @model_filter + @rule_filter def skip_model1(model: Model) -> bool: """Skips for model1, passes for model2.""" return model.name != "model1" - @rule(model_filters={skip_model1()}) + @rule(rule_filters={skip_model1()}) def model_rule_with_filter(model: Model) -> RuleViolation | None: """Rule that always fails when not filtered.""" return RuleViolation(message="I always fail.") @@ -308,12 +308,12 @@ def model_rule_with_filter(model: Model) -> RuleViolation | None: def source_rule_with_filter() -> Type[Rule]: """An example rule that skips through a filter.""" - @model_filter + @rule_filter def skip_source1(source: Source) -> bool: """Skips for source1, passes for source2.""" return source.name != "table1" - @rule(model_filters={skip_source1()}) + @rule(rule_filters={skip_source1()}) def source_rule_with_filter(source: Source) -> RuleViolation | None: """Rule that always fails when not filtered.""" return RuleViolation(message="I always fail.") @@ -325,7 +325,7 @@ def source_rule_with_filter(source: Source) -> RuleViolation | None: def model_class_rule_with_filter() -> Type[Rule]: """Using class definitions for filters and rules.""" - class SkipModel1(ModelFilter): + class SkipModel1(RuleFilter): description = "Filter defined by a class." def evaluate(self, model: Model) -> bool: @@ -334,7 +334,7 @@ def evaluate(self, model: Model) -> bool: class ModelRuleWithFilter(Rule): description = "Filter defined by a class." - model_filters = frozenset({SkipModel1()}) + rule_filters = frozenset({SkipModel1()}) def evaluate(self, model: Model) -> RuleViolation | None: return RuleViolation(message="I always fail.") @@ -346,7 +346,7 @@ def evaluate(self, model: Model) -> RuleViolation | None: def source_class_rule_with_filter() -> Type[Rule]: """Using class definitions for filters and rules.""" - class SkipSource1(ModelFilter): + class SkipSource1(RuleFilter): description = "Filter defined by a class." def evaluate(self, source: Source) -> bool: @@ -355,7 +355,7 @@ def evaluate(self, source: Source) -> bool: class SourceRuleWithFilter(Rule): description = "Filter defined by a class." - model_filters = frozenset({SkipSource1()}) + rule_filters = frozenset({SkipSource1()}) def evaluate(self, source: Source) -> RuleViolation | None: return RuleViolation(message="I always fail.") diff --git a/tests/resources/pyproject.toml b/tests/resources/pyproject.toml index f8bcbfb..b8edcba 100644 --- a/tests/resources/pyproject.toml +++ b/tests/resources/pyproject.toml @@ -25,4 +25,4 @@ model_name="model2" [tool.dbt-score.rules."tests.rules.example.rule_test_example"] severity=4 -model_filter_names=["tests.rules.example.skip_model1"] +rule_filter_names=["tests.rules.example.skip_model1"] diff --git a/tests/rules/example.py b/tests/rules/example.py index 7c383ac..680e4d8 100644 --- a/tests/rules/example.py +++ b/tests/rules/example.py @@ -1,6 +1,6 @@ """Example rules.""" -from dbt_score import Model, RuleViolation, model_filter, rule +from dbt_score import Model, RuleViolation, rule_filter, rule @rule() @@ -8,7 +8,7 @@ def rule_test_example(model: Model) -> RuleViolation | None: """An example rule.""" -@model_filter +@rule_filter def skip_model1(model: Model) -> bool: """An example filter.""" return model.name != "model1" diff --git a/tests/test_config.py b/tests/test_config.py index d8ea9da..f45086b 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -29,7 +29,7 @@ def test_load_valid_toml_file(valid_config_path): assert config.fail_any_evaluable_under == 6.9 assert config.rules_config[ "tests.rules.example.rule_test_example" - ].model_filter_names == ["tests.rules.example.skip_model1"] + ].rule_filter_names == ["tests.rules.example.skip_model1"] def test_load_invalid_toml_file(caplog, invalid_config_path): diff --git a/tests/test_model_filter.py b/tests/test_model_filter.py index 6ba1256..31fbb47 100644 --- a/tests/test_model_filter.py +++ b/tests/test_model_filter.py @@ -1,13 +1,13 @@ """Test model filters.""" import pytest -from dbt_score.model_filter import ModelFilter, model_filter +from dbt_score.rule_filter import RuleFilter, rule_filter from dbt_score.models import Model, Source def test_basic_filter(model1, model2): """Test basic filter testing for a specific model.""" - @model_filter + @rule_filter def only_model1(model: Model) -> bool: """Some description.""" return model.name == "model1" @@ -21,7 +21,7 @@ def only_model1(model: Model) -> bool: def test_basic_filter_with_sources(source1, source2): """Test basic filter testing for a specific model.""" - @model_filter + @rule_filter def only_source1(source: Source) -> bool: """Some description.""" return source.name == "table1" @@ -35,7 +35,7 @@ def only_source1(source: Source) -> bool: def test_class_filter(model1, model2): """Test basic filter using class.""" - class OnlyModel1(ModelFilter): + class OnlyModel1(RuleFilter): description = "Some description." def evaluate(self, model: Model) -> bool: @@ -50,7 +50,7 @@ def evaluate(self, model: Model) -> bool: def test_class_filter_with_sources(source1, source2): """Test basic filter using class.""" - class OnlySource1(ModelFilter): + class OnlySource1(RuleFilter): description = "Some description." def evaluate(self, source: Source) -> bool: @@ -66,7 +66,7 @@ def test_missing_description_rule_filter(): """Test missing description in filter decorator.""" with pytest.raises(AttributeError): - @model_filter() + @rule_filter() def example_filter(model: Model) -> bool: return True @@ -75,7 +75,7 @@ def test_missing_description_rule_class(): """Test missing description in filter class.""" with pytest.raises(AttributeError): - class BadFilter(ModelFilter): + class BadFilter(RuleFilter): """Bad example filter.""" def evaluate(self, model: Model) -> bool: @@ -87,7 +87,7 @@ def test_missing_evaluate_rule_class(model1): """Test missing evaluate implementation in filter class.""" with pytest.raises(TypeError): - class BadFilter(ModelFilter): + class BadFilter(RuleFilter): """Bad example filter.""" description = "Description of the rule." diff --git a/tests/test_rule_registry.py b/tests/test_rule_registry.py index 4e7abb2..8898a94 100644 --- a/tests/test_rule_registry.py +++ b/tests/test_rule_registry.py @@ -15,7 +15,7 @@ def test_rule_registry_discovery(default_config): "tests.rules.example.rule_test_example", "tests.rules.nested.example.rule_test_nested_example", ] - assert list(r._model_filters.keys()) == ["tests.rules.example.skip_model1"] + assert list(r._rule_filters.keys()) == ["tests.rules.example.skip_model1"] def test_disabled_rule_registry_discovery(): From 713bdee7d3abcdcbc9dd70facb92c294a7a6d76d Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Sat, 19 Oct 2024 19:12:25 -0400 Subject: [PATCH 13/41] fmt --- src/dbt_score/__init__.py | 2 +- src/dbt_score/cli.py | 5 ++++- src/dbt_score/evaluation.py | 8 ++++++-- src/dbt_score/formatters/__init__.py | 2 +- .../formatters/human_readable_formatter.py | 6 ++++-- src/dbt_score/rule.py | 2 +- src/dbt_score/rule_registry.py | 2 +- tests/conftest.py | 2 +- tests/rules/example.py | 2 +- tests/test_cli.py | 4 +++- tests/test_model_filter.py | 2 +- tests/test_scoring.py | 14 +++++++++++--- 12 files changed, 35 insertions(+), 16 deletions(-) diff --git a/src/dbt_score/__init__.py b/src/dbt_score/__init__.py index 9a8aa59..2cbe499 100644 --- a/src/dbt_score/__init__.py +++ b/src/dbt_score/__init__.py @@ -1,8 +1,8 @@ """Init dbt_score package.""" -from dbt_score.rule_filter import RuleFilter, rule_filter from dbt_score.models import Model, Source from dbt_score.rule import Rule, RuleViolation, Severity, rule +from dbt_score.rule_filter import RuleFilter, rule_filter __all__ = [ "Model", diff --git a/src/dbt_score/cli.py b/src/dbt_score/cli.py index ac48a42..f11aa4c 100644 --- a/src/dbt_score/cli.py +++ b/src/dbt_score/cli.py @@ -148,7 +148,10 @@ def lint( ctx.exit(2) if ( - any(x.value < config.fail_any_evaluable_under for x in evaluation.scores.values()) + any( + x.value < config.fail_any_evaluable_under + for x in evaluation.scores.values() + ) or evaluation.project_score.value < config.fail_project_under ): ctx.exit(1) diff --git a/src/dbt_score/evaluation.py b/src/dbt_score/evaluation.py index 861b616..6ff227b 100644 --- a/src/dbt_score/evaluation.py +++ b/src/dbt_score/evaluation.py @@ -54,7 +54,9 @@ def evaluate(self) -> None: """Evaluate all rules.""" rules = self._rule_registry.rules.values() - for evaluable in chain(self._manifest_loader.models, self._manifest_loader.sources): + for evaluable in chain( + self._manifest_loader.models, self._manifest_loader.sources + ): self.results[evaluable] = {} for rule in rules: try: @@ -66,7 +68,9 @@ def evaluate(self) -> None: except Exception as e: self.results[evaluable][rule.__class__] = e - self.scores[evaluable] = self._scorer.score_evaluable(self.results[evaluable]) + self.scores[evaluable] = self._scorer.score_evaluable( + self.results[evaluable] + ) self._formatter.evaluable_evaluated( evaluable, self.results[evaluable], self.scores[evaluable] ) diff --git a/src/dbt_score/formatters/__init__.py b/src/dbt_score/formatters/__init__.py index d9f6d46..f3a7aa0 100644 --- a/src/dbt_score/formatters/__init__.py +++ b/src/dbt_score/formatters/__init__.py @@ -10,7 +10,7 @@ if typing.TYPE_CHECKING: from dbt_score.evaluation import EvaluableResultsType -from dbt_score.models import ManifestLoader, Model, Evaluable +from dbt_score.models import Evaluable, ManifestLoader class Formatter(ABC): diff --git a/src/dbt_score/formatters/human_readable_formatter.py b/src/dbt_score/formatters/human_readable_formatter.py index 58f9b1d..7d182a0 100644 --- a/src/dbt_score/formatters/human_readable_formatter.py +++ b/src/dbt_score/formatters/human_readable_formatter.py @@ -4,7 +4,7 @@ from dbt_score.evaluation import EvaluableResultsType from dbt_score.formatters import Formatter -from dbt_score.models import Model, Evaluable +from dbt_score.models import Evaluable from dbt_score.rule import RuleViolation from dbt_score.scoring import Score @@ -33,7 +33,9 @@ def evaluable_evaluated( """Callback when an evaluable item has been evaluated.""" if score.value < self._config.fail_any_evaluable_under: self._failed_evaluables.append((evaluable, score)) - print(f"{score.badge} {self.bold(evaluable.name)} (score: {score.rounded_value!s})") + print( + f"{score.badge} {self.bold(evaluable.name)} (score: {score.rounded_value!s})" + ) for rule, result in results.items(): if result is None: print(f"{self.indent}{self.label_ok} {rule.source()}") diff --git a/src/dbt_score/rule.py b/src/dbt_score/rule.py index 6f60ea1..b1e5a99 100644 --- a/src/dbt_score/rule.py +++ b/src/dbt_score/rule.py @@ -8,8 +8,8 @@ from more_itertools import first_true -from dbt_score.rule_filter import RuleFilter from dbt_score.models import Evaluable +from dbt_score.rule_filter import RuleFilter class Severity(Enum): diff --git a/src/dbt_score/rule_registry.py b/src/dbt_score/rule_registry.py index da4cf56..f60a87e 100644 --- a/src/dbt_score/rule_registry.py +++ b/src/dbt_score/rule_registry.py @@ -12,8 +12,8 @@ from dbt_score.config import Config from dbt_score.exceptions import DuplicatedRuleException -from dbt_score.rule_filter import RuleFilter from dbt_score.rule import Rule, RuleConfig +from dbt_score.rule_filter import RuleFilter logger = logging.getLogger(__name__) diff --git a/tests/conftest.py b/tests/conftest.py index 49d8703..9022730 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,8 +6,8 @@ from dbt_score import Model, Rule, RuleViolation, Severity, Source, rule from dbt_score.config import Config -from dbt_score.rule_filter import RuleFilter, rule_filter from dbt_score.models import ManifestLoader +from dbt_score.rule_filter import RuleFilter, rule_filter from pytest import fixture # Configuration diff --git a/tests/rules/example.py b/tests/rules/example.py index 680e4d8..11b2f95 100644 --- a/tests/rules/example.py +++ b/tests/rules/example.py @@ -1,6 +1,6 @@ """Example rules.""" -from dbt_score import Model, RuleViolation, rule_filter, rule +from dbt_score import Model, RuleViolation, rule, rule_filter @rule() diff --git a/tests/test_cli.py b/tests/test_cli.py index c4f5d4e..5229108 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -115,5 +115,7 @@ def test_fail_any_model_under(manifest_path): assert "model1" in result.output assert "model2" in result.output - assert "Error: evaluable score too low, fail_any_evaluable_under" in result.stdout + assert ( + "Error: evaluable score too low, fail_any_evaluable_under" in result.stdout + ) assert result.exit_code == 1 diff --git a/tests/test_model_filter.py b/tests/test_model_filter.py index 31fbb47..dcb2b7e 100644 --- a/tests/test_model_filter.py +++ b/tests/test_model_filter.py @@ -1,7 +1,7 @@ """Test model filters.""" import pytest -from dbt_score.rule_filter import RuleFilter, rule_filter from dbt_score.models import Model, Source +from dbt_score.rule_filter import RuleFilter, rule_filter def test_basic_filter(model1, model2): diff --git a/tests/test_scoring.py b/tests/test_scoring.py index e355435..356992c 100644 --- a/tests/test_scoring.py +++ b/tests/test_scoring.py @@ -17,7 +17,9 @@ def test_scorer_model_severity_low(default_config, rule_severity_low): assert scorer.score_evaluable({rule_severity_low: None}).value == 10.0 assert scorer.score_evaluable({rule_severity_low: Exception()}).value == 10.0 assert ( - round(scorer.score_evaluable({rule_severity_low: RuleViolation("error")}).value, 2) + round( + scorer.score_evaluable({rule_severity_low: RuleViolation("error")}).value, 2 + ) == 6.67 ) @@ -29,7 +31,10 @@ def test_scorer_model_severity_medium(default_config, rule_severity_medium): assert scorer.score_evaluable({rule_severity_medium: Exception()}).value == 10.0 assert ( round( - scorer.score_evaluable({rule_severity_medium: RuleViolation("error")}).value, 2 + scorer.score_evaluable( + {rule_severity_medium: RuleViolation("error")} + ).value, + 2, ) == 3.33 ) @@ -40,7 +45,10 @@ def test_scorer_model_severity_high(default_config, rule_severity_high): scorer = Scorer(config=default_config) assert scorer.score_evaluable({rule_severity_high: None}).value == 10.0 assert scorer.score_evaluable({rule_severity_high: Exception()}).value == 10.0 - assert scorer.score_evaluable({rule_severity_high: RuleViolation("error")}).value == 0.0 + assert ( + scorer.score_evaluable({rule_severity_high: RuleViolation("error")}).value + == 0.0 + ) def test_scorer_model_severity_critical(default_config, rule_severity_critical): From aa9371d204ab99f0ae8e2d830a06830147444e88 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Sat, 19 Oct 2024 19:20:37 -0400 Subject: [PATCH 14/41] update dbt_ls cmd to include sources --- src/dbt_score/dbt_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dbt_score/dbt_utils.py b/src/dbt_score/dbt_utils.py index f6ffc5b..fa89938 100644 --- a/src/dbt_score/dbt_utils.py +++ b/src/dbt_score/dbt_utils.py @@ -69,7 +69,7 @@ def dbt_parse() -> "dbtRunnerResult": @dbt_required def dbt_ls(select: Iterable[str] | None) -> Iterable[str]: """Run dbt ls.""" - cmd = ["ls", "--resource-type", "model", "--output", "name"] + cmd = ["ls", "--resource-types", "model", "source", "--output", "name"] if select: cmd += ["--select", *select] From bb2ff9bc1853783f944f9860f71e0cd472818fc0 Mon Sep 17 00:00:00 2001 From: Oliver Tosky <42260747+otosky@users.noreply.github.com> Date: Mon, 21 Oct 2024 23:23:21 -0400 Subject: [PATCH 15/41] add newline to pyproject.toml Co-authored-by: Jochem van Dooren --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3f36430..651019a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -116,4 +116,4 @@ show_missing = true fail_under = 80 exclude_also = [ "@overload" -] \ No newline at end of file +] From 68453dcddd87994382e136a2e781710749f14ab2 Mon Sep 17 00:00:00 2001 From: Oliver Tosky <42260747+otosky@users.noreply.github.com> Date: Mon, 21 Oct 2024 23:24:02 -0400 Subject: [PATCH 16/41] fix docstring Co-authored-by: Jochem van Dooren --- src/dbt_score/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dbt_score/cli.py b/src/dbt_score/cli.py index f11aa4c..5d3bbd3 100644 --- a/src/dbt_score/cli.py +++ b/src/dbt_score/cli.py @@ -106,7 +106,7 @@ def lint( fail_project_under: float, fail_any_evaluable_under: float, ) -> None: - """Lint dbt models metadata.""" + """Lint dbt metadata.""" manifest_provided = ( click.get_current_context().get_parameter_source("manifest") != ParameterSource.DEFAULT From feedc7612696ed648b62299bcb36c8654deec037 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Wed, 23 Oct 2024 10:26:14 -0400 Subject: [PATCH 17/41] update some docstrings for naming changes --- src/dbt_score/rule_filter.py | 2 +- src/dbt_score/scoring.py | 2 +- tests/test_model_filter.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/dbt_score/rule_filter.py b/src/dbt_score/rule_filter.py index cf859d2..c795759 100644 --- a/src/dbt_score/rule_filter.py +++ b/src/dbt_score/rule_filter.py @@ -1,4 +1,4 @@ -"""Model filtering to choose when to apply specific rules.""" +"""Evaluable filtering to choose when to apply specific rules.""" import inspect import typing from typing import Any, Callable, Type, TypeAlias, overload diff --git a/src/dbt_score/scoring.py b/src/dbt_score/scoring.py index 0d11f64..bcf8efa 100644 --- a/src/dbt_score/scoring.py +++ b/src/dbt_score/scoring.py @@ -78,7 +78,7 @@ def score_aggregate_evaluables(self, scores: list[Score]) -> Score: """Compute the score of a list of evaluables.""" actual_scores = [s.value for s in scores] if 0.0 in actual_scores: - # Any model with a CRITICAL violation makes the project score 0 + # Any evaluable with a CRITICAL violation makes the project score 0 score = Score(self.min_score, self._badge(self.min_score)) elif len(actual_scores) == 0: score = Score(self.max_score, self._badge(self.max_score)) diff --git a/tests/test_model_filter.py b/tests/test_model_filter.py index dcb2b7e..51715ea 100644 --- a/tests/test_model_filter.py +++ b/tests/test_model_filter.py @@ -1,4 +1,4 @@ -"""Test model filters.""" +"""Test rule filters.""" import pytest from dbt_score.models import Model, Source from dbt_score.rule_filter import RuleFilter, rule_filter @@ -19,7 +19,7 @@ def only_model1(model: Model) -> bool: def test_basic_filter_with_sources(source1, source2): - """Test basic filter testing for a specific model.""" + """Test basic filter testing for a specific source.""" @rule_filter def only_source1(source: Source) -> bool: From 91fe1ec96e22d32f64269e3b7f5034846b95c741 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Wed, 23 Oct 2024 11:07:39 -0400 Subject: [PATCH 18/41] update pyproject description --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 651019a..9949784 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "pdm.backend" name = "dbt-score" dynamic = ["version"] -description = "Linter for dbt model metadata." +description = "Linter for dbt metadata." authors = [ {name = "Picnic Analyst Development Platform", email = "analyst-development-platform@teampicnic.com"} ] From 0fff2fc04695528edc972b9de51bba33a1852fd9 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Wed, 23 Oct 2024 11:37:57 -0400 Subject: [PATCH 19/41] update manifest filter --- src/dbt_score/models.py | 25 ++++++++++++++++++------- tests/test_models.py | 2 +- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/dbt_score/models.py b/src/dbt_score/models.py index c03815c..070dc44 100644 --- a/src/dbt_score/models.py +++ b/src/dbt_score/models.py @@ -72,7 +72,7 @@ def from_node(cls, test_node: dict[str, Any]) -> "Test": @dataclass class Column: - """Represents a column in a model. + """Represents a column. Attributes: name: The name of the column. @@ -318,6 +318,16 @@ class Source(HasColumnsMixin): _raw_values: dict[str, Any] = field(default_factory=dict) _raw_test_values: list[dict[str, Any]] = field(default_factory=list) + @property + def selector_name(self) -> str: + """Returns the name used by the dbt `source` method selector. + + Note: This is also the format output by `dbt ls --output name` for sources. + + https://docs.getdbt.com/reference/node-selection/methods#the-source-method + """ + return f"{self.source_name}.{self.name}" + @classmethod def from_node( cls, node_values: dict[str, Any], test_values: list[dict[str, Any]] @@ -391,10 +401,10 @@ def __init__(self, file_path: Path, select: Iterable[str] | None = None): self._load_sources() if select: - self._select_models(select) + self._filter_evaluables(select) - if len(self.models) == 0: - logger.warning("No model found.") + if (len(self.models) + len(self.sources)) == 0: + logger.warning("Nothing to evaluate!") def _load_models(self) -> None: """Load the models from the manifest.""" @@ -419,8 +429,8 @@ def _reindex_tests(self) -> None: ): self.tests[attached_node].append(node_values) - def _select_models(self, select: Iterable[str]) -> None: - """Filter models like dbt's --select.""" + def _filter_evaluables(self, select: Iterable[str]) -> None: + """Filter evaluables like dbt's --select.""" single_model_select = re.compile(r"[a-zA-Z0-9_]+") if all(single_model_select.fullmatch(x) for x in select): @@ -431,4 +441,5 @@ def _select_models(self, select: Iterable[str]) -> None: # Use dbt's implementation of --select selected = dbt_ls(select) - self.models = [x for x in self.models if x.name in selected] + self.models = [m for m in self.models if m.name in selected] + self.sources = [s for s in self.sources if s.selector_name in selected] diff --git a/tests/test_models.py b/tests/test_models.py index ba94529..e4d7654 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -60,4 +60,4 @@ def test_manifest_no_model(mock_dbt_ls, mock_read_text, raw_manifest, caplog): manifest_loader = ManifestLoader(Path("some.json"), select=["non_existing"]) assert len(manifest_loader.models) == 0 - assert "No model found" in caplog.text + assert "Nothing to evaluate!" in caplog.text From 69c3df93da4dcd85882068e6a8ad63043be9c260 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Wed, 23 Oct 2024 13:16:45 -0400 Subject: [PATCH 20/41] update _reindex_tests for sources --- src/dbt_score/models.py | 22 +++++++++++++++------ tests/resources/manifest.json | 37 +++++++++++++++++++++++++++++++++++ tests/test_models.py | 1 + 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/src/dbt_score/models.py b/src/dbt_score/models.py index 070dc44..9d38c70 100644 --- a/src/dbt_score/models.py +++ b/src/dbt_score/models.py @@ -8,6 +8,8 @@ from pathlib import Path from typing import Any, Iterable, Literal, TypeAlias +from more_itertools import first + from dbt_score.dbt_utils import dbt_ls logger = logging.getLogger(__name__) @@ -421,13 +423,21 @@ def _load_sources(self) -> None: self.sources.append(source) def _reindex_tests(self) -> None: - """Index tests based on their model id.""" + """Index tests based on their associated evaluable.""" for node_values in self.raw_nodes.values(): - # Only include tests that are attached to a model - if node_values.get("resource_type") == "test" and ( - attached_node := node_values.get("attached_node") - ): - self.tests[attached_node].append(node_values) + if node_values.get("resource_type") == "test": + # tests for models have a non-null value for `attached_node` + if attached_node := node_values.get("attached_node"): + self.tests[attached_node].append(node_values) + + # Tests for sources will have a null `attached_node`, and a non-empty list for `sources`. + # They need to be attributed to the source id based on the `depends_on` field. + elif node_values.get("sources") and ( + source_unique_id := first( + node_values.get("depends_on", {}).get("nodes", []), None + ) + ): + self.tests[source_unique_id].append(node_values) def _filter_evaluables(self, select: Iterable[str]) -> None: """Filter evaluables like dbt's --select.""" diff --git a/tests/resources/manifest.json b/tests/resources/manifest.json index 6e5664c..af5908b 100644 --- a/tests/resources/manifest.json +++ b/tests/resources/manifest.json @@ -124,6 +124,43 @@ "test.package.test3": { "resource_type": "test", "package_name": "package" + }, + "test.package.source_test1": { + "resource_type": "test", + "package_name": "package", + "name": "source_test1", + "attached_node": null, + "sources": [["my_source", "table1"]], + "depends_on": { + "nodes": ["source.package.my_source.table1"] + }, + "test_metadata": { + "name": "type", + "kwargs": {} + } + }, + "test.package.bad_source_test1": { + "resource_type": "test", + "package_name": "package", + "name": "source_test__malformed_missing_depends_on", + "attached_node": null, + "sources": [["my_source", "table1"]], + "test_metadata": { + "name": "type", + "kwargs": {} + } + }, + "test.package.bad_source_test2": { + "resource_type": "test", + "package_name": "package", + "name": "source_test__malformed_missing_depends_on_nodes", + "attached_node": null, + "sources": [["my_source", "table1"]], + "depends_on": {}, + "test_metadata": { + "name": "type", + "kwargs": {} + } } }, "sources": { diff --git a/tests/test_models.py b/tests/test_models.py index e4d7654..b0fac9d 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -29,6 +29,7 @@ def test_manifest_load(mock_read_text, raw_manifest): if source["package_name"] == raw_manifest["metadata"]["project_name"] ] ) + assert loader.sources[0].tests[0].name == "source_test1" @patch("dbt_score.models.Path.read_text") From fe1dd7f1ff66ea6a3f41b550f1844ea9d7951410 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Wed, 23 Oct 2024 13:21:25 -0400 Subject: [PATCH 21/41] update more model_filter renames in comments --- src/dbt_score/rule_filter.py | 2 +- tests/test_rule_registry.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/dbt_score/rule_filter.py b/src/dbt_score/rule_filter.py index c795759..ab47dc4 100644 --- a/src/dbt_score/rule_filter.py +++ b/src/dbt_score/rule_filter.py @@ -59,7 +59,7 @@ def __hash__(self) -> int: return hash(self.source()) -# Use @overload to have proper typing for both @model_filter and @model_filter(...) +# Use @overload to have proper typing for both @rule_filter and @rule_filter(...) # https://mypy.readthedocs.io/en/stable/generics.html#decorator-factories diff --git a/tests/test_rule_registry.py b/tests/test_rule_registry.py index 8898a94..343d98b 100644 --- a/tests/test_rule_registry.py +++ b/tests/test_rule_registry.py @@ -55,7 +55,7 @@ def test_rule_registry_core_rules(default_config): assert len(r.rules) > 0 -def test_rule_registry_model_filters(valid_config_path, model1, model2): +def test_rule_registry_rule_filters(valid_config_path, model1, model2): """Test config filters are loaded.""" config = Config() config._load_toml_file(str(valid_config_path)) From e35176bb8ec0ff484f0ed317c65e4922be46eb60 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Wed, 23 Oct 2024 21:58:49 -0400 Subject: [PATCH 22/41] rename config to fail_any_item_under --- src/dbt_score/cli.py | 12 ++++++------ src/dbt_score/config.py | 4 ++-- src/dbt_score/formatters/human_readable_formatter.py | 6 +++--- src/dbt_score/formatters/json_formatter.py | 2 +- tests/formatters/test_human_readable_formatter.py | 2 +- tests/resources/pyproject.toml | 2 +- tests/test_cli.py | 4 ++-- tests/test_config.py | 2 +- 8 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/dbt_score/cli.py b/src/dbt_score/cli.py index 5d3bbd3..1dd3796 100644 --- a/src/dbt_score/cli.py +++ b/src/dbt_score/cli.py @@ -88,8 +88,8 @@ def cli() -> None: default=None, ) @click.option( - "--fail_any_evaluable_under", - help="Fail if any evaluable is under this value.", + "--fail-any-item-under", + help="Fail if any evaluable item is under this value.", type=float, is_flag=False, default=None, @@ -104,7 +104,7 @@ def lint( manifest: Path, run_dbt_parse: bool, fail_project_under: float, - fail_any_evaluable_under: float, + fail_any_item_under: float, ) -> None: """Lint dbt metadata.""" manifest_provided = ( @@ -122,8 +122,8 @@ def lint( config.overload({"disabled_rules": disabled_rule}) if fail_project_under: config.overload({"fail_project_under": fail_project_under}) - if fail_any_evaluable_under: - config.overload({"fail_any_evaluable_under": fail_any_evaluable_under}) + if fail_any_item_under: + config.overload({"fail_any_item_under": fail_any_item_under}) try: if run_dbt_parse: @@ -149,7 +149,7 @@ def lint( if ( any( - x.value < config.fail_any_evaluable_under + x.value < config.fail_any_item_under for x in evaluation.scores.values() ) or evaluation.project_score.value < config.fail_project_under diff --git a/src/dbt_score/config.py b/src/dbt_score/config.py index fd46e91..a3e0b2a 100644 --- a/src/dbt_score/config.py +++ b/src/dbt_score/config.py @@ -56,7 +56,7 @@ class Config: "disabled_rules", "inject_cwd_in_python_path", "fail_project_under", - "fail_any_evaluable_under", + "fail_any_item_under", ] _rules_section: Final[str] = "rules" _badges_section: Final[str] = "badges" @@ -70,7 +70,7 @@ def __init__(self) -> None: self.config_file: Path | None = None self.badge_config: BadgeConfig = BadgeConfig() self.fail_project_under: float = 5.0 - self.fail_any_evaluable_under: float = 5.0 + self.fail_any_item_under: float = 5.0 def set_option(self, option: str, value: Any) -> None: """Set an option in the config.""" diff --git a/src/dbt_score/formatters/human_readable_formatter.py b/src/dbt_score/formatters/human_readable_formatter.py index 7d182a0..4454965 100644 --- a/src/dbt_score/formatters/human_readable_formatter.py +++ b/src/dbt_score/formatters/human_readable_formatter.py @@ -31,7 +31,7 @@ def evaluable_evaluated( self, evaluable: Evaluable, results: EvaluableResultsType, score: Score ) -> None: """Callback when an evaluable item has been evaluated.""" - if score.value < self._config.fail_any_evaluable_under: + if score.value < self._config.fail_any_item_under: self._failed_evaluables.append((evaluable, score)) print( f"{score.badge} {self.bold(evaluable.name)} (score: {score.rounded_value!s})" @@ -55,8 +55,8 @@ def project_evaluated(self, score: Score) -> None: if len(self._failed_evaluables) > 0: print() print( - f"Error: evaluable score too low, fail_any_evaluable_under = " - f"{self._config.fail_any_evaluable_under}" + f"Error: evaluable score too low, fail_any_item_under = " + f"{self._config.fail_any_item_under}" ) for evaluable, evaluable_score in self._failed_evaluables: print(f"Model {evaluable.name} scored {evaluable_score.value}") diff --git a/src/dbt_score/formatters/json_formatter.py b/src/dbt_score/formatters/json_formatter.py index 2648de6..4c32f98 100644 --- a/src/dbt_score/formatters/json_formatter.py +++ b/src/dbt_score/formatters/json_formatter.py @@ -70,7 +70,7 @@ def evaluable_evaluated( self._model_results[evaluable.name] = { "score": score.value, "badge": score.badge, - "pass": score.value >= self._config.fail_any_evaluable_under, + "pass": score.value >= self._config.fail_any_item_under, "results": {}, } for rule, result in results.items(): diff --git a/tests/formatters/test_human_readable_formatter.py b/tests/formatters/test_human_readable_formatter.py index 1ff26e2..a8f26e3 100644 --- a/tests/formatters/test_human_readable_formatter.py +++ b/tests/formatters/test_human_readable_formatter.py @@ -115,7 +115,7 @@ def test_human_readable_formatter_low_model_score( Project score: \x1B[1m0.0\x1B[0m 🚧 -Error: evaluable score too low, fail_any_evaluable_under = 5.0 +Error: evaluable score too low, fail_any_item_under = 5.0 Model model1 scored 0.0 """ ) diff --git a/tests/resources/pyproject.toml b/tests/resources/pyproject.toml index b8edcba..14144b3 100644 --- a/tests/resources/pyproject.toml +++ b/tests/resources/pyproject.toml @@ -2,7 +2,7 @@ rule_namespaces = ["foo", "tests"] disabled_rules = ["foo.foo", "tests.bar"] fail_project_under = 7.5 -fail_any_evaluable_under = 6.9 +fail_any_item_under = 6.9 [tool.dbt-score.badges] wip.icon = "🏗️" diff --git a/tests/test_cli.py b/tests/test_cli.py index 5229108..a0f5308 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -110,12 +110,12 @@ def test_fail_any_model_under(manifest_path): with patch("dbt_score.cli.Config._load_toml_file"): runner = CliRunner() result = runner.invoke( - lint, ["--manifest", manifest_path, "--fail_any_evaluable_under", "10.0"] + lint, ["--manifest", manifest_path, "--fail-any-item-under", "10.0"] ) assert "model1" in result.output assert "model2" in result.output assert ( - "Error: evaluable score too low, fail_any_evaluable_under" in result.stdout + "Error: evaluable score too low, fail_any_item_under" in result.stdout ) assert result.exit_code == 1 diff --git a/tests/test_config.py b/tests/test_config.py index f45086b..67a0e04 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -26,7 +26,7 @@ def test_load_valid_toml_file(valid_config_path): assert config.badge_config.second.icon == "2️⃣" assert config.badge_config.first.icon == "1️⃣" assert config.fail_project_under == 7.5 - assert config.fail_any_evaluable_under == 6.9 + assert config.fail_any_item_under == 6.9 assert config.rules_config[ "tests.rules.example.rule_test_example" ].rule_filter_names == ["tests.rules.example.skip_model1"] From beac2e4bc72e98e8587bae12e9ada6e5d7e80f47 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Wed, 23 Oct 2024 22:56:45 -0400 Subject: [PATCH 23/41] update human_readable_formatter --- .../formatters/human_readable_formatter.py | 27 +++++-- .../test_human_readable_formatter.py | 75 ++++++++++--------- 2 files changed, 63 insertions(+), 39 deletions(-) diff --git a/src/dbt_score/formatters/human_readable_formatter.py b/src/dbt_score/formatters/human_readable_formatter.py index 4454965..d8580a1 100644 --- a/src/dbt_score/formatters/human_readable_formatter.py +++ b/src/dbt_score/formatters/human_readable_formatter.py @@ -4,7 +4,7 @@ from dbt_score.evaluation import EvaluableResultsType from dbt_score.formatters import Formatter -from dbt_score.models import Evaluable +from dbt_score.models import Evaluable, Model, Source from dbt_score.rule import RuleViolation from dbt_score.scoring import Score @@ -27,15 +27,29 @@ def bold(text: str) -> str: """Return text in bold.""" return f"\033[1m{text}\033[0m" + @staticmethod + def pretty_name(evaluable: Evaluable) -> str: + """Return the pretty name for an evaluable.""" + match evaluable: + case Model(): + return evaluable.name + case Source(): + return evaluable.selector_name + case _: + raise NotImplementedError + def evaluable_evaluated( self, evaluable: Evaluable, results: EvaluableResultsType, score: Score ) -> None: """Callback when an evaluable item has been evaluated.""" if score.value < self._config.fail_any_item_under: self._failed_evaluables.append((evaluable, score)) - print( - f"{score.badge} {self.bold(evaluable.name)} (score: {score.rounded_value!s})" - ) + + resource_type = type(evaluable).__name__ + name_formatted = f"{resource_type[0]}: {self.pretty_name(evaluable)}" + header = f"{score.badge} {self.bold(name_formatted)} (score: {score.rounded_value!s})" + + print(header) for rule, result in results.items(): if result is None: print(f"{self.indent}{self.label_ok} {rule.source()}") @@ -59,7 +73,10 @@ def project_evaluated(self, score: Score) -> None: f"{self._config.fail_any_item_under}" ) for evaluable, evaluable_score in self._failed_evaluables: - print(f"Model {evaluable.name} scored {evaluable_score.value}") + resource_type = type(evaluable) + print( + f"{resource_type.__name__} {self.pretty_name(evaluable)} scored {evaluable_score.value}" + ) elif score.value < self._config.fail_project_under: print() diff --git a/tests/formatters/test_human_readable_formatter.py b/tests/formatters/test_human_readable_formatter.py index a8f26e3..e6cd631 100644 --- a/tests/formatters/test_human_readable_formatter.py +++ b/tests/formatters/test_human_readable_formatter.py @@ -1,4 +1,5 @@ """Unit tests for the human readable formatter.""" +from textwrap import dedent from dbt_score.evaluation import EvaluableResultsType from dbt_score.formatters.human_readable_formatter import HumanReadableFormatter @@ -26,14 +27,14 @@ def test_human_readable_formatter_model( } formatter.evaluable_evaluated(model1, results, Score(10.0, "🥇")) stdout = capsys.readouterr().out - assert ( - stdout - == """🥇 \x1B[1mmodel1\x1B[0m (score: 10.0) - \x1B[1;32mOK \x1B[0m tests.conftest.rule_severity_low - \x1B[1;31mERR \x1B[0m tests.conftest.rule_severity_medium: Oh noes - \x1B[1;33mWARN\x1B[0m (critical) tests.conftest.rule_severity_critical: Error - -""" + assert stdout == dedent( + """\ + 🥇 \x1B[1mM: model1\x1B[0m (score: 10.0) + \x1B[1;32mOK \x1B[0m tests.conftest.rule_severity_low + \x1B[1;31mERR \x1B[0m tests.conftest.rule_severity_medium: Oh noes + \x1B[1;33mWARN\x1B[0m (critical) tests.conftest.rule_severity_critical: Error + + """ ) @@ -67,14 +68,14 @@ def test_human_readable_formatter_near_perfect_model_score( } formatter.evaluable_evaluated(model1, results, Score(9.99, "🥈")) stdout = capsys.readouterr().out - assert ( - stdout - == """🥈 \x1B[1mmodel1\x1B[0m (score: 9.9) - \x1B[1;32mOK \x1B[0m tests.conftest.rule_severity_low - \x1B[1;31mERR \x1B[0m tests.conftest.rule_severity_medium: Oh noes - \x1B[1;33mWARN\x1B[0m (critical) tests.conftest.rule_severity_critical: Error - -""" + assert stdout == dedent( + """\ + 🥈 \x1B[1mM: model1\x1B[0m (score: 9.9) + \x1B[1;32mOK \x1B[0m tests.conftest.rule_severity_low + \x1B[1;31mERR \x1B[0m tests.conftest.rule_severity_medium: Oh noes + \x1B[1;33mWARN\x1B[0m (critical) tests.conftest.rule_severity_critical: Error + + """ ) @@ -90,11 +91,12 @@ def test_human_readable_formatter_near_perfect_project_score( assert stdout == "Project score: \x1B[1m9.9\x1B[0m 🥈\n" -def test_human_readable_formatter_low_model_score( +def test_human_readable_formatter_low_evaluable_score( capsys, default_config, manifest_loader, model1, + source1, rule_severity_critical, ): """Ensure the formatter has the correct output when a model has a low score.""" @@ -105,19 +107,24 @@ def test_human_readable_formatter_low_model_score( rule_severity_critical: RuleViolation("Error"), } formatter.evaluable_evaluated(model1, results, Score(0.0, "🚧")) + formatter.evaluable_evaluated(source1, results, Score(0.0, "🚧")) formatter.project_evaluated(Score(0.0, "🚧")) stdout = capsys.readouterr().out - print() - assert ( - stdout - == """🚧 \x1B[1mmodel1\x1B[0m (score: 0.0) - \x1B[1;33mWARN\x1B[0m (critical) tests.conftest.rule_severity_critical: Error -Project score: \x1B[1m0.0\x1B[0m 🚧 + assert stdout == dedent( + """\ + 🚧 \x1B[1mM: model1\x1B[0m (score: 0.0) + \x1B[1;33mWARN\x1B[0m (critical) tests.conftest.rule_severity_critical: Error -Error: evaluable score too low, fail_any_item_under = 5.0 -Model model1 scored 0.0 -""" + 🚧 \x1B[1mS: my_source.table1\x1B[0m (score: 0.0) + \x1B[1;33mWARN\x1B[0m (critical) tests.conftest.rule_severity_critical: Error + + Project score: \x1B[1m0.0\x1B[0m 🚧 + + Error: evaluable score too low, fail_any_item_under = 5.0 + Model model1 scored 0.0 + Source my_source.table1 scored 0.0 + """ ) @@ -138,14 +145,14 @@ def test_human_readable_formatter_low_project_score( formatter.evaluable_evaluated(model1, results, Score(10.0, "🥇")) formatter.project_evaluated(Score(0.0, "🚧")) stdout = capsys.readouterr().out - print() - assert ( - stdout - == """🥇 \x1B[1mmodel1\x1B[0m (score: 10.0) - \x1B[1;33mWARN\x1B[0m (critical) tests.conftest.rule_severity_critical: Error -Project score: \x1B[1m0.0\x1B[0m 🚧 + assert stdout == dedent( + """\ + 🥇 \x1B[1mM: model1\x1B[0m (score: 10.0) + \x1B[1;33mWARN\x1B[0m (critical) tests.conftest.rule_severity_critical: Error + + Project score: \x1B[1m0.0\x1B[0m 🚧 -Error: project score too low, fail_project_under = 5.0 -""" + Error: project score too low, fail_project_under = 5.0 + """ ) From 95da946b75f282b6636848ad85129e5a9d22ffc1 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Wed, 23 Oct 2024 22:56:53 -0400 Subject: [PATCH 24/41] fmt --- src/dbt_score/cli.py | 5 +---- tests/test_cli.py | 4 +--- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/dbt_score/cli.py b/src/dbt_score/cli.py index 1dd3796..b468074 100644 --- a/src/dbt_score/cli.py +++ b/src/dbt_score/cli.py @@ -148,10 +148,7 @@ def lint( ctx.exit(2) if ( - any( - x.value < config.fail_any_item_under - for x in evaluation.scores.values() - ) + any(x.value < config.fail_any_item_under for x in evaluation.scores.values()) or evaluation.project_score.value < config.fail_project_under ): ctx.exit(1) diff --git a/tests/test_cli.py b/tests/test_cli.py index a0f5308..35979ac 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -115,7 +115,5 @@ def test_fail_any_model_under(manifest_path): assert "model1" in result.output assert "model2" in result.output - assert ( - "Error: evaluable score too low, fail_any_item_under" in result.stdout - ) + assert "Error: evaluable score too low, fail_any_item_under" in result.stdout assert result.exit_code == 1 From e095ffc97934ec1aa18e98589044296457a256e5 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Wed, 23 Oct 2024 23:10:25 -0400 Subject: [PATCH 25/41] move check for resource_type match to `should_evaluate` method --- src/dbt_score/evaluation.py | 4 +--- src/dbt_score/rule.py | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/dbt_score/evaluation.py b/src/dbt_score/evaluation.py index 6ff227b..98a7081 100644 --- a/src/dbt_score/evaluation.py +++ b/src/dbt_score/evaluation.py @@ -60,9 +60,7 @@ def evaluate(self) -> None: self.results[evaluable] = {} for rule in rules: try: - if rule.resource_type is type(evaluable) and rule.should_evaluate( - evaluable - ): # Consider rule filter(s). + if rule.should_evaluate(evaluable): result = rule.evaluate(evaluable, **rule.config) self.results[evaluable][rule.__class__] = result except Exception as e: diff --git a/src/dbt_score/rule.py b/src/dbt_score/rule.py index b1e5a99..60b0a84 100644 --- a/src/dbt_score/rule.py +++ b/src/dbt_score/rule.py @@ -125,10 +125,20 @@ def evaluate(self, evaluable: Evaluable) -> RuleViolation | None: @classmethod def should_evaluate(cls, evaluable: Evaluable) -> bool: - """Checks if all filters in the rule allow evaluation.""" + """Checks whether the rule should be applied against the evaluable. + + The evaluable must satisfy the following criteria: + - all filters in the rule allow evaluation + - the rule and evaluable have matching resource_types + """ + resource_types_match = cls.resource_type is type(evaluable) + if cls.rule_filters: - return all(f.evaluate(evaluable) for f in cls.rule_filters) - return True + return ( + all(f.evaluate(evaluable) for f in cls.rule_filters) + and resource_types_match + ) + return resource_types_match @classmethod def set_severity(cls, severity: Severity) -> None: From 836bb6800bca8cbd2e82f2a3eaee043318a8591f Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Wed, 23 Oct 2024 23:56:13 -0400 Subject: [PATCH 26/41] update docs --- docs/configuration.md | 6 ++-- docs/create_rules.md | 70 +++++++++++++++++++++++++++++++++---------- docs/index.md | 8 ++--- 3 files changed, 62 insertions(+), 22 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 008dd17..1293bdd 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -18,7 +18,7 @@ rule_namespaces = ["dbt_score.rules", "dbt_score_rules", "custom_rules"] disabled_rules = ["dbt_score.rules.generic.columns_have_description"] inject_cwd_in_python_path = true fail_project_under = 7.5 -fail_any_model_under = 8.0 +fail_any_item_under = 8.0 [tool.dbt-score.badges] first.threshold = 10.0 @@ -51,7 +51,7 @@ The following options can be set in the `pyproject.toml` file: - `disabled_rules`: A list of rules to disable. - `fail_project_under` (default: `5.0`): If the project score is below this value the command will fail with return code 1. -- `fail_any_model_under` (default: `5.0`): If any model scores below this value +- `fail_any_item_under` (default: `5.0`): If any model or source scores below this value the command will fail with return code 1. #### Badges configuration @@ -86,7 +86,7 @@ Every rule can be configured with the following option: - `severity`: The severity of the rule. Rules have a default severity and can be overridden. It's an integer with a minimum value of 1 and a maximum value of 4. -- `model_filter_names`: Filters used by the rule. Takes a list of names that can +- `rule_filter_names`: Filters used by the rule. Takes a list of names that can be found in the same namespace as the rules (see [Package rules](package_rules.md)). diff --git a/docs/create_rules.md b/docs/create_rules.md index cd5e5b4..53de042 100644 --- a/docs/create_rules.md +++ b/docs/create_rules.md @@ -1,9 +1,9 @@ # Create rules -In order to lint and score models, `dbt-score` uses a set of rules that are -applied to each model. A rule can pass or fail when it is run. Based on the -severity of the rule, models are scored with the weighted average of the rules -results. Note that `dbt-score` comes bundled with a +In order to lint and score models or sources, `dbt-score` uses a set of +rules that are applied to each item. A rule can pass or fail when it is run. +Based on the severity of the rule, items are scored with the weighted +average of the rules results. Note that `dbt-score` comes bundled with a [set of default rules](rules/generic.md). On top of the generic rules, it's possible to add your own rules. Two ways exist @@ -21,7 +21,7 @@ The `@rule` decorator can be used to easily create a new rule: from dbt_score import Model, rule, RuleViolation @rule -def has_description(model: Model) -> RuleViolation | None: +def model_has_description(model: Model) -> RuleViolation | None: """A model should have a description.""" if not model.description: return RuleViolation(message="Model lacks a description.") @@ -31,6 +31,20 @@ The name of the function is the name of the rule and the docstring of the function is its description. Therefore, it is important to use a self-explanatory name for the function and document it well. +The type annotation for the rule's argument dictates whether the rule should +be applied to dbt models or sources. + +Here is the same example rule, applied to sources: +```python +from dbt_score import rule, RuleViolation, Source + +@rule +def source_has_description(source: Source) -> RuleViolation | None: + """A source should have a description.""" + if not source.description: + return RuleViolation(message="Source lacks a description.") +``` + The severity of a rule can be set using the `severity` argument: ```python @@ -45,15 +59,23 @@ For more advanced use cases, a rule can be created by inheriting from the `Rule` class: ```python -from dbt_score import Model, Rule, RuleViolation +from dbt_score import Model, Rule, RuleViolation, Source -class HasDescription(Rule): +class ModelHasDescription(Rule): description = "A model should have a description." def evaluate(self, model: Model) -> RuleViolation | None: """Evaluate the rule.""" if not model.description: return RuleViolation(message="Model lacks a description.") + +class SourceHasDescription(Rule): + description = "A source should have a description." + + def evaluate(self, source: Source) -> RuleViolation | None: + """Evaluate the rule.""" + if not source.description: + return RuleViolation(message="Source lacks a description.") ``` ### Rules location @@ -91,30 +113,48 @@ def sql_has_reasonable_number_of_lines(model: Model, max_lines: int = 200) -> Ru ) ``` -### Filtering models +### Filtering rules -Custom and standard rules can be configured to have model filters. Filters allow -models to be ignored by one or multiple rules. +Custom and standard rules can be configured to have filters. Filters allow +models or sources to be ignored by one or multiple rules if the item doesn't satisfy +the filter criteria. Filters are created using the same discovery mechanism and interface as custom rules, except they do not accept parameters. Similar to Python's built-in -`filter` function, when the filter evaluation returns `True` the model should be +`filter` function, when the filter evaluation returns `True` the item should be evaluated, otherwise it should be ignored. ```python -from dbt_score import ModelFilter, model_filter +from dbt_score import Model, RuleFilter, rule_filter -@model_filter +@rule_filter def only_schema_x(model: Model) -> bool: """Only applies a rule to schema X.""" return model.schema.lower() == 'x' -class SkipSchemaY(ModelFilter): +class SkipSchemaY(RuleFilter): description = "Applies a rule to every schema but Y." def evaluate(self, model: Model) -> bool: return model.schema.lower() != 'y' ``` +Filters also rely on type-annotations to dictate whether they apply to models or sources: + +```python +from dbt_score import RuleFilter, rule_filter, Source + +@rule_filter +def only_from_source_a(source: Source) -> bool: + """Only applies a rule to source tables from source X.""" + return source.source_name.lower() == 'a' + +class SkipSourceDatabaseB(RuleFilter): + description = "Applies a rule to every source except Database B." + def evaluate(self, source: Source) -> bool: + return source.database.lower() != 'b' +``` + + Similar to setting a rule severity, standard rules can have filters set in the [configuration file](configuration.md/#tooldbt-scorerulesrule_namespacerule_name), while custom rules accept the configuration file or a decorator parameter. @@ -123,7 +163,7 @@ while custom rules accept the configuration file or a decorator parameter. from dbt_score import Model, rule, RuleViolation from my_project import only_schema_x -@rule(model_filters={only_schema_x()}) +@rule(rule_filters={only_schema_x()}) def models_in_x_follow_naming_standard(model: Model) -> RuleViolation | None: """Models in schema X must follow the naming standard.""" if some_regex_fails(model.name): diff --git a/docs/index.md b/docs/index.md index e74efc0..bcae4fb 100644 --- a/docs/index.md +++ b/docs/index.md @@ -12,7 +12,7 @@ encourage) good practices. ``` > dbt-score lint -🥇 customers (score: 10.0) +🥇 M: customers (score: 10.0) OK dbt_score.rules.generic.has_description OK dbt_score.rules.generic.has_owner OK dbt_score.rules.generic.sql_has_reasonable_number_of_lines @@ -32,10 +32,10 @@ data teams dealing with many models. To that end, `dbt-score` has 2 main features: -- It runs rules on models, and displays rule violations. Those can be used in +- It runs rules on dbt models and sources, and displays any rule violations. These can be used in interactive environments or in CI. -- Using those run results, it scores models, as to give them a measure of their - maturity. This score can help gamify model metadata improvements, and be +- Using those run results, it scores items, to ascribe them a measure of their + maturity. This score can help gamify metadata improvements/coverage, and be reflected in data catalogs. `dbt-score` aims to: From fea456bb876ec01875a37de3ea72ef448fcb0cfb Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Wed, 23 Oct 2024 23:58:44 -0400 Subject: [PATCH 27/41] rename test_model_filter -> test_rule_filter --- tests/{test_model_filter.py => test_rule_filter.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_model_filter.py => test_rule_filter.py} (100%) diff --git a/tests/test_model_filter.py b/tests/test_rule_filter.py similarity index 100% rename from tests/test_model_filter.py rename to tests/test_rule_filter.py From ea7691ffaf393a935eee412a686edde65c310943 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Mon, 28 Oct 2024 18:49:58 -0400 Subject: [PATCH 28/41] add newline to pyproject.toml --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 9949784..e4b2ba1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -117,3 +117,4 @@ fail_under = 80 exclude_also = [ "@overload" ] + From 1122273f0299a3255d8df85f3fa101828820c50b Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Mon, 28 Oct 2024 22:41:00 -0400 Subject: [PATCH 29/41] validate that filters match the resource type of the rules they attach to --- src/dbt_score/rule.py | 11 +++++++ tests/test_rule.py | 73 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) diff --git a/src/dbt_score/rule.py b/src/dbt_score/rule.py index 60b0a84..57957d7 100644 --- a/src/dbt_score/rule.py +++ b/src/dbt_score/rule.py @@ -83,6 +83,17 @@ def __init_subclass__(cls, **kwargs) -> None: # type: ignore cls.resource_type = cls._introspect_resource_type() + cls._validate_rule_filters() + + @classmethod + def _validate_rule_filters(cls): + for rule_filter in cls.rule_filters: + if rule_filter.resource_type != cls.resource_type: + raise TypeError( + f"Mismatched resource_type on filter {rule_filter.__class__.__name__}. " + f"Expected {cls.resource_type.__name__}, but got {rule_filter.resource_type.__name__}." + ) + @classmethod def _introspect_resource_type(cls) -> Type[Evaluable]: evaluate_func = getattr(cls, "_orig_evaluate", cls.evaluate) diff --git a/tests/test_rule.py b/tests/test_rule.py index 467deee..76f9074 100644 --- a/tests/test_rule.py +++ b/tests/test_rule.py @@ -2,6 +2,9 @@ import pytest from dbt_score import Model, Rule, RuleViolation, Severity, Source, rule +from dbt_score.rule_filter import rule_filter + +from src.dbt_score.rule_filter import RuleFilter def test_rule_decorator_and_class( @@ -81,3 +84,73 @@ def test_rule_introspects_its_resource_type(request, rule_fixture, expected_type """Test that each rule is aware of the resource-type that it can be evaluated against.""" rule = request.getfixturevalue(rule_fixture) assert rule().resource_type is expected_type + + +class TestRuleFilterValidation: + """Tests that a rule filter must have a matching resource-type to the rule it's attached to.""" + + @pytest.fixture + def source_filter_no_parens(self): + """Example source filter with bare decorator.""" + + @rule_filter + def source_filter(source: Source) -> bool: + """Description.""" + return False + + return source_filter() + + @pytest.fixture + def source_filter_parens(self): + """Example source filter with decorator and parens.""" + + @rule_filter() + def source_filter(source: Source) -> bool: + """Description.""" + return False + + return source_filter() + + @pytest.fixture + def source_filter_class(self): + """Example class-based source filter.""" + + class SourceFilter(RuleFilter): + description = "Description" + + def evaluate(self, source: Source) -> bool: + return False + + return SourceFilter + + @pytest.mark.parametrize( + "rule_filter_fixture", + ["source_filter_no_parens", "source_filter_parens", "source_filter_class"], + ) + def test_rule_filter_must_match_resource_type_as_rule( + self, request, rule_filter_fixture + ): + """Tests that rules cannot be initialized when filters are of the wrong resource-type.""" + rule_filter = request.getfixturevalue(rule_filter_fixture) + + with pytest.raises(TypeError) as excinfo: + + @rule(rule_filters={rule_filter}) + def model_always_passes(model: Model) -> RuleViolation | None: + """Description.""" + pass + + assert "Mismatched resource_type on filter" in str(excinfo.value) + assert "Expected Model, but got Source" in str(excinfo.value) + + with pytest.raises(TypeError): + + class ModelAlwaysPasses(Rule): + description = "Description." + rule_filters = frozenset([rule_filter]) + + def evaluate(self, model: Model) -> RuleViolation | None: + pass + + assert "Mismatched resource_type on filter" in str(excinfo.value) + assert "Expected Model, but got Source" in str(excinfo.value) From eca0b1e3fb79fd437889a03ef4694ca7d089eddc Mon Sep 17 00:00:00 2001 From: Jochem van Dooren Date: Thu, 31 Oct 2024 11:31:27 +0100 Subject: [PATCH 30/41] Final renaming of models to include sources --- CHANGELOG.md | 5 +++++ README.md | 2 +- docs/configuration.md | 6 +++--- docs/create_rules.md | 23 +++++++++++----------- docs/get_started.md | 4 ++-- docs/index.md | 15 +++++++------- docs/programmatic_invocations.md | 6 +++--- src/dbt_score/cli.py | 2 +- src/dbt_score/formatters/json_formatter.py | 14 ++++++------- src/dbt_score/lint.py | 2 +- src/dbt_score/models.py | 4 ++-- tests/formatters/test_json_formatter.py | 2 +- tests/test_cli.py | 2 +- 13 files changed, 47 insertions(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 920e6d3..0e7270a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,11 @@ and this project adheres to ## [Unreleased] +- **Breaking**: Support linting of sources. +- **Breaking**: `--fail_any_model_under` becomes `--fail-any-item-under` and + `--fail_project_under` becomes `--fail-project-under`. +- **Breaking**: `model_filter_names` becomes `rule_filter_names`. + ## [0.7.1] - 2024-11-01 - Fix mkdocs. diff --git a/README.md b/README.md index dcea0eb..50a0853 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ ## What is `dbt-score`? -`dbt-score` is a linter for dbt model metadata. +`dbt-score` is a linter for dbt metadata. [dbt][dbt] (Data Build Tool) is a great framework for creating, building, organizing, testing and documenting _data models_, i.e. data sets living in a diff --git a/docs/configuration.md b/docs/configuration.md index 1293bdd..a64df37 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -51,8 +51,8 @@ The following options can be set in the `pyproject.toml` file: - `disabled_rules`: A list of rules to disable. - `fail_project_under` (default: `5.0`): If the project score is below this value the command will fail with return code 1. -- `fail_any_item_under` (default: `5.0`): If any model or source scores below this value - the command will fail with return code 1. +- `fail_any_item_under` (default: `5.0`): If any model or source scores below + this value the command will fail with return code 1. #### Badges configuration @@ -70,7 +70,7 @@ All badges except `wip` can be configured with the following option: - `threshold`: The threshold for the badge. A decimal number between `0.0` and `10.0` that will be used to compare to the score. The threshold is the minimum - score required for a model to be rewarded with a certain badge. + score required for a model or source to be rewarded with a certain badge. The default values can be found in the [BadgeConfig](reference/config.md#dbt_score.config.BadgeConfig). diff --git a/docs/create_rules.md b/docs/create_rules.md index 53de042..dc6c6df 100644 --- a/docs/create_rules.md +++ b/docs/create_rules.md @@ -1,9 +1,9 @@ # Create rules -In order to lint and score models or sources, `dbt-score` uses a set of -rules that are applied to each item. A rule can pass or fail when it is run. -Based on the severity of the rule, items are scored with the weighted -average of the rules results. Note that `dbt-score` comes bundled with a +In order to lint and score models or sources, `dbt-score` uses a set of rules +that are applied to each item. A rule can pass or fail when it is run. Based on +the severity of the rule, items are scored with the weighted average of the +rules results. Note that `dbt-score` comes bundled with a [set of default rules](rules/generic.md). On top of the generic rules, it's possible to add your own rules. Two ways exist @@ -31,10 +31,11 @@ The name of the function is the name of the rule and the docstring of the function is its description. Therefore, it is important to use a self-explanatory name for the function and document it well. -The type annotation for the rule's argument dictates whether the rule should -be applied to dbt models or sources. +The type annotation for the rule's argument dictates whether the rule should be +applied to dbt models or sources. Here is the same example rule, applied to sources: + ```python from dbt_score import rule, RuleViolation, Source @@ -68,7 +69,7 @@ class ModelHasDescription(Rule): """Evaluate the rule.""" if not model.description: return RuleViolation(message="Model lacks a description.") - + class SourceHasDescription(Rule): description = "A source should have a description." @@ -116,8 +117,8 @@ def sql_has_reasonable_number_of_lines(model: Model, max_lines: int = 200) -> Ru ### Filtering rules Custom and standard rules can be configured to have filters. Filters allow -models or sources to be ignored by one or multiple rules if the item doesn't satisfy -the filter criteria. +models or sources to be ignored by one or multiple rules if the item doesn't +satisfy the filter criteria. Filters are created using the same discovery mechanism and interface as custom rules, except they do not accept parameters. Similar to Python's built-in @@ -138,7 +139,8 @@ class SkipSchemaY(RuleFilter): return model.schema.lower() != 'y' ``` -Filters also rely on type-annotations to dictate whether they apply to models or sources: +Filters also rely on type-annotations to dictate whether they apply to models or +sources: ```python from dbt_score import RuleFilter, rule_filter, Source @@ -154,7 +156,6 @@ class SkipSourceDatabaseB(RuleFilter): return source.database.lower() != 'b' ``` - Similar to setting a rule severity, standard rules can have filters set in the [configuration file](configuration.md/#tooldbt-scorerulesrule_namespacerule_name), while custom rules accept the configuration file or a decorator parameter. diff --git a/docs/get_started.md b/docs/get_started.md index 9c00b9f..d96d585 100644 --- a/docs/get_started.md +++ b/docs/get_started.md @@ -40,8 +40,8 @@ It's also possible to automatically run `dbt parse`, to generate the dbt-score lint --run-dbt-parse ``` -To lint only a selection of models, the argument `--select` can be used. It -accepts any +To lint only a selection of models or sources, the argument `--select` can be +used. It accepts any [dbt node selection syntax](https://docs.getdbt.com/reference/node-selection/syntax): ```shell diff --git a/docs/index.md b/docs/index.md index bcae4fb..c100708 100644 --- a/docs/index.md +++ b/docs/index.md @@ -2,8 +2,9 @@ `dbt-score` is a linter for [dbt](https://www.getdbt.com/) metadata. -dbt allows data practitioners to organize their data in to _models_. Those -models have metadata associated with them: documentation, tests, types, etc. +dbt allows data practitioners to organize their data in to _models_ and +_sources_. Those models and sources have metadata associated with them: +documentation, tests, types, etc. `dbt-score` allows to lint and score this metadata, in order to enforce (or encourage) good practices. @@ -25,15 +26,15 @@ score. ## Philosophy -dbt models are often used as metadata containers: either in YAML files or -through the use of `{{ config() }}` blocks, they are associated with a lot of +dbt models/sources are often used as metadata containers: either in YAML files +or through the use of `{{ config() }}` blocks, they are associated with a lot of information. At scale, it becomes tedious to enforce good practices in large -data teams dealing with many models. +data teams dealing with many models/sources. To that end, `dbt-score` has 2 main features: -- It runs rules on dbt models and sources, and displays any rule violations. These can be used in - interactive environments or in CI. +- It runs rules on dbt models and sources, and displays any rule violations. + These can be used in interactive environments or in CI. - Using those run results, it scores items, to ascribe them a measure of their maturity. This score can help gamify metadata improvements/coverage, and be reflected in data catalogs. diff --git a/docs/programmatic_invocations.md b/docs/programmatic_invocations.md index 8e2637c..5b95025 100644 --- a/docs/programmatic_invocations.md +++ b/docs/programmatic_invocations.md @@ -61,9 +61,9 @@ When `dbt-score` terminates, it exists with one of the following exit codes: project being linted either doesn't raise any warning, or the warnings are small enough to be above the thresholds. This generally means "successful linting". -- `1` in case of linting errors. This is the unhappy case: some models in the - project raise enough warnings to have a score below the defined thresholds. - This generally means "linting doesn't pass". +- `1` in case of linting errors. This is the unhappy case: some models or + sources in the project raise enough warnings to have a score below the defined + thresholds. This generally means "linting doesn't pass". - `2` in case of an unexpected error. This happens for example if something is misconfigured (for example a faulty dbt project), or the wrong parameters are given to the CLI. This generally means "setup needs to be fixed". diff --git a/src/dbt_score/cli.py b/src/dbt_score/cli.py index b468074..9585d95 100644 --- a/src/dbt_score/cli.py +++ b/src/dbt_score/cli.py @@ -81,7 +81,7 @@ def cli() -> None: default=False, ) @click.option( - "--fail_project_under", + "--fail-project-under", help="Fail if the project score is under this value.", type=float, is_flag=False, diff --git a/src/dbt_score/formatters/json_formatter.py b/src/dbt_score/formatters/json_formatter.py index 4c32f98..263af49 100644 --- a/src/dbt_score/formatters/json_formatter.py +++ b/src/dbt_score/formatters/json_formatter.py @@ -4,7 +4,7 @@ ```json { - "models": { + "evaluables": { "model_foo": { "score": 5.0, "badge": "🥈", @@ -60,14 +60,14 @@ class JSONFormatter(Formatter): def __init__(self, *args: Any, **kwargs: Any): """Instantiate formatter.""" super().__init__(*args, **kwargs) - self._model_results: dict[str, dict[str, Any]] = {} + self.evaluable_results: dict[str, dict[str, Any]] = {} self._project_results: dict[str, Any] def evaluable_evaluated( self, evaluable: Evaluable, results: EvaluableResultsType, score: Score ) -> None: """Callback when an evaluable item has been evaluated.""" - self._model_results[evaluable.name] = { + self.evaluable_results[evaluable.name] = { "score": score.value, "badge": score.badge, "pass": score.value >= self._config.fail_any_item_under, @@ -76,19 +76,19 @@ def evaluable_evaluated( for rule, result in results.items(): severity = rule.severity.name.lower() if result is None: - self._model_results[evaluable.name]["results"][rule.source()] = { + self.evaluable_results[evaluable.name]["results"][rule.source()] = { "result": "OK", "severity": severity, "message": None, } elif isinstance(result, RuleViolation): - self._model_results[evaluable.name]["results"][rule.source()] = { + self.evaluable_results[evaluable.name]["results"][rule.source()] = { "result": "WARN", "severity": severity, "message": result.message, } else: - self._model_results[evaluable.name]["results"][rule.source()] = { + self.evaluable_results[evaluable.name]["results"][rule.source()] = { "result": "ERR", "severity": severity, "message": str(result), @@ -102,7 +102,7 @@ def project_evaluated(self, score: Score) -> None: "pass": score.value >= self._config.fail_project_under, } document = { - "models": self._model_results, + "evaluables": self.evaluable_results, "project": self._project_results, } print(json.dumps(document, indent=2, ensure_ascii=False)) diff --git a/src/dbt_score/lint.py b/src/dbt_score/lint.py index 53d09f0..9a4dfcb 100644 --- a/src/dbt_score/lint.py +++ b/src/dbt_score/lint.py @@ -1,4 +1,4 @@ -"""Lint dbt models metadata.""" +"""Lint dbt metadata.""" from pathlib import Path from typing import Iterable, Literal diff --git a/src/dbt_score/models.py b/src/dbt_score/models.py index 9d38c70..83a2d89 100644 --- a/src/dbt_score/models.py +++ b/src/dbt_score/models.py @@ -44,7 +44,7 @@ def from_raw_values(cls, raw_values: dict[str, Any]) -> "Constraint": @dataclass class Test: - """Test for a column or model. + """Test for a column, model or source. Attributes: name: The name of the test. @@ -372,7 +372,7 @@ def __hash__(self) -> int: class ManifestLoader: - """Load the models and tests from the manifest.""" + """Load the models, sources and tests from the manifest.""" def __init__(self, file_path: Path, select: Iterable[str] | None = None): """Initialize the ManifestLoader. diff --git a/tests/formatters/test_json_formatter.py b/tests/formatters/test_json_formatter.py index aca8691..5c707da 100644 --- a/tests/formatters/test_json_formatter.py +++ b/tests/formatters/test_json_formatter.py @@ -29,7 +29,7 @@ def test_json_formatter( assert ( stdout == """{ - "models": { + "evaluables": { "model1": { "score": 10.0, "badge": "🥇", diff --git a/tests/test_cli.py b/tests/test_cli.py index 35979ac..91d2bae 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -96,7 +96,7 @@ def test_fail_project_under(manifest_path): with patch("dbt_score.cli.Config._load_toml_file"): runner = CliRunner() result = runner.invoke( - lint, ["--manifest", manifest_path, "--fail_project_under", "10.0"] + lint, ["--manifest", manifest_path, "--fail-project-under", "10.0"] ) assert "model1" in result.output From cc40c46adc3152b2aaa76ff76b4c38908b5a63ac Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Thu, 31 Oct 2024 10:25:33 -0400 Subject: [PATCH 31/41] fix line lengths --- .../formatters/human_readable_formatter.py | 8 +- src/dbt_score/models.py | 18 +++-- src/dbt_score/rule.py | 9 ++- src/dbt_score/rule_filter.py | 3 +- .../test_human_readable_formatter.py | 73 +++++++++---------- tests/test_evaluation.py | 2 +- tests/test_rule.py | 6 +- 7 files changed, 65 insertions(+), 54 deletions(-) diff --git a/src/dbt_score/formatters/human_readable_formatter.py b/src/dbt_score/formatters/human_readable_formatter.py index d8580a1..ed2db7a 100644 --- a/src/dbt_score/formatters/human_readable_formatter.py +++ b/src/dbt_score/formatters/human_readable_formatter.py @@ -47,7 +47,10 @@ def evaluable_evaluated( resource_type = type(evaluable).__name__ name_formatted = f"{resource_type[0]}: {self.pretty_name(evaluable)}" - header = f"{score.badge} {self.bold(name_formatted)} (score: {score.rounded_value!s})" + header = ( + f"{score.badge} " + f"{self.bold(name_formatted)} (score: {score.rounded_value!s})" + ) print(header) for rule, result in results.items(): @@ -75,7 +78,8 @@ def project_evaluated(self, score: Score) -> None: for evaluable, evaluable_score in self._failed_evaluables: resource_type = type(evaluable) print( - f"{resource_type.__name__} {self.pretty_name(evaluable)} scored {evaluable_score.value}" + f"{resource_type.__name__} " + f"{self.pretty_name(evaluable)} scored {evaluable_score.value}" ) elif score.value < self._config.fail_project_under: diff --git a/src/dbt_score/models.py b/src/dbt_score/models.py index 83a2d89..bc26b3b 100644 --- a/src/dbt_score/models.py +++ b/src/dbt_score/models.py @@ -260,9 +260,12 @@ class SourceFreshness: This is referred to as `FreshnessThreshold` in the dbt JSONSchema. Attributes: - warn_after: The threshold after which the dbt source freshness check should soft-fail with a warning. - error_after: The threshold after which the dbt source freshness check should fail. - filter: An optional filter to apply to the input data before running source freshness check. + warn_after: The threshold after which the dbt source freshness check should + soft-fail with a warning. + error_after: The threshold after which the dbt source freshness check should + fail. + filter: An optional filter to apply to the input data before running + source freshness check. """ warn_after: Duration @@ -275,7 +278,8 @@ class Source(HasColumnsMixin): """Represents a dbt source table. Attributes: - unique_id: The id of the source table, e.g. 'source.package.source_name.source_table_name'. + unique_id: The id of the source table, + e.g. 'source.package.source_name.source_table_name'. name: The alias of the source table. description: The full description of the source table. source_name: The source namespace. @@ -430,8 +434,10 @@ def _reindex_tests(self) -> None: if attached_node := node_values.get("attached_node"): self.tests[attached_node].append(node_values) - # Tests for sources will have a null `attached_node`, and a non-empty list for `sources`. - # They need to be attributed to the source id based on the `depends_on` field. + # Tests for sources will have a null `attached_node`, + # and a non-empty list for `sources`. + # They need to be attributed to the source id + # based on the `depends_on` field. elif node_values.get("sources") and ( source_unique_id := first( node_values.get("depends_on", {}).get("nodes", []), None diff --git a/src/dbt_score/rule.py b/src/dbt_score/rule.py index 57957d7..130e00e 100644 --- a/src/dbt_score/rule.py +++ b/src/dbt_score/rule.py @@ -90,8 +90,10 @@ def _validate_rule_filters(cls): for rule_filter in cls.rule_filters: if rule_filter.resource_type != cls.resource_type: raise TypeError( - f"Mismatched resource_type on filter {rule_filter.__class__.__name__}. " - f"Expected {cls.resource_type.__name__}, but got {rule_filter.resource_type.__name__}." + f"Mismatched resource_type on filter " + f"{rule_filter.__class__.__name__}. " + f"Expected {cls.resource_type.__name__}, " + f"but got {rule_filter.resource_type.__name__}." ) @classmethod @@ -106,7 +108,8 @@ def _introspect_resource_type(cls) -> Type[Evaluable]: if not resource_type_argument: raise TypeError( - "Subclass must implement method `evaluate` with an annotated Model or Source argument." + "Subclass must implement method `evaluate` with an " + "annotated Model or Source argument." ) return resource_type_argument.annotation diff --git a/src/dbt_score/rule_filter.py b/src/dbt_score/rule_filter.py index ab47dc4..21adc35 100644 --- a/src/dbt_score/rule_filter.py +++ b/src/dbt_score/rule_filter.py @@ -40,7 +40,8 @@ def _introspect_resource_type(cls) -> Type[Evaluable]: if not resource_type_argument: raise TypeError( - "Subclass must implement method `evaluate` with an annotated Model or Source argument." + "Subclass must implement method `evaluate` with an " + "annotated Model or Source argument." ) return resource_type_argument.annotation diff --git a/tests/formatters/test_human_readable_formatter.py b/tests/formatters/test_human_readable_formatter.py index e6cd631..0f1f90e 100644 --- a/tests/formatters/test_human_readable_formatter.py +++ b/tests/formatters/test_human_readable_formatter.py @@ -27,15 +27,14 @@ def test_human_readable_formatter_model( } formatter.evaluable_evaluated(model1, results, Score(10.0, "🥇")) stdout = capsys.readouterr().out - assert stdout == dedent( - """\ - 🥇 \x1B[1mM: model1\x1B[0m (score: 10.0) - \x1B[1;32mOK \x1B[0m tests.conftest.rule_severity_low - \x1B[1;31mERR \x1B[0m tests.conftest.rule_severity_medium: Oh noes - \x1B[1;33mWARN\x1B[0m (critical) tests.conftest.rule_severity_critical: Error - - """ - ) + expected = """\ + 🥇 \x1B[1mM: model1\x1B[0m (score: 10.0) + \x1B[1;32mOK \x1B[0m tests.conftest.rule_severity_low + \x1B[1;31mERR \x1B[0m tests.conftest.rule_severity_medium: Oh noes + \x1B[1;33mWARN\x1B[0m (critical) tests.conftest.rule_severity_critical: Error + + """ + assert stdout == dedent(expected) def test_human_readable_formatter_project(capsys, default_config, manifest_loader): @@ -68,15 +67,15 @@ def test_human_readable_formatter_near_perfect_model_score( } formatter.evaluable_evaluated(model1, results, Score(9.99, "🥈")) stdout = capsys.readouterr().out - assert stdout == dedent( - """\ - 🥈 \x1B[1mM: model1\x1B[0m (score: 9.9) - \x1B[1;32mOK \x1B[0m tests.conftest.rule_severity_low - \x1B[1;31mERR \x1B[0m tests.conftest.rule_severity_medium: Oh noes - \x1B[1;33mWARN\x1B[0m (critical) tests.conftest.rule_severity_critical: Error - - """ - ) + + expected = """\ + 🥈 \x1B[1mM: model1\x1B[0m (score: 9.9) + \x1B[1;32mOK \x1B[0m tests.conftest.rule_severity_low + \x1B[1;31mERR \x1B[0m tests.conftest.rule_severity_medium: Oh noes + \x1B[1;33mWARN\x1B[0m (critical) tests.conftest.rule_severity_critical: Error + + """ + assert stdout == dedent(expected) def test_human_readable_formatter_near_perfect_project_score( @@ -111,21 +110,20 @@ def test_human_readable_formatter_low_evaluable_score( formatter.project_evaluated(Score(0.0, "🚧")) stdout = capsys.readouterr().out - assert stdout == dedent( - """\ - 🚧 \x1B[1mM: model1\x1B[0m (score: 0.0) - \x1B[1;33mWARN\x1B[0m (critical) tests.conftest.rule_severity_critical: Error + expected = """\ + 🚧 \x1B[1mM: model1\x1B[0m (score: 0.0) + \x1B[1;33mWARN\x1B[0m (critical) tests.conftest.rule_severity_critical: Error - 🚧 \x1B[1mS: my_source.table1\x1B[0m (score: 0.0) - \x1B[1;33mWARN\x1B[0m (critical) tests.conftest.rule_severity_critical: Error + 🚧 \x1B[1mS: my_source.table1\x1B[0m (score: 0.0) + \x1B[1;33mWARN\x1B[0m (critical) tests.conftest.rule_severity_critical: Error - Project score: \x1B[1m0.0\x1B[0m 🚧 + Project score: \x1B[1m0.0\x1B[0m 🚧 - Error: evaluable score too low, fail_any_item_under = 5.0 - Model model1 scored 0.0 - Source my_source.table1 scored 0.0 - """ - ) + Error: evaluable score too low, fail_any_item_under = 5.0 + Model model1 scored 0.0 + Source my_source.table1 scored 0.0 + """ + assert stdout == dedent(expected) def test_human_readable_formatter_low_project_score( @@ -146,13 +144,12 @@ def test_human_readable_formatter_low_project_score( formatter.project_evaluated(Score(0.0, "🚧")) stdout = capsys.readouterr().out - assert stdout == dedent( - """\ - 🥇 \x1B[1mM: model1\x1B[0m (score: 10.0) - \x1B[1;33mWARN\x1B[0m (critical) tests.conftest.rule_severity_critical: Error + expected = """\ + 🥇 \x1B[1mM: model1\x1B[0m (score: 10.0) + \x1B[1;33mWARN\x1B[0m (critical) tests.conftest.rule_severity_critical: Error - Project score: \x1B[1m0.0\x1B[0m 🚧 + Project score: \x1B[1m0.0\x1B[0m 🚧 - Error: project score too low, fail_project_under = 5.0 - """ - ) + Error: project score too low, fail_project_under = 5.0 + """ + assert stdout == dedent(expected) diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py index 1633b08..73573f4 100644 --- a/tests/test_evaluation.py +++ b/tests/test_evaluation.py @@ -263,7 +263,7 @@ def test_evaluation_with_class_filter( def test_evaluation_with_models_and_sources( manifest_path, default_config, decorator_rule, decorator_rule_source ): - """Test that model rules are applied to models and source rules are applied to sources.""" + """Test that model rules apply to models and source rules apply to sources.""" manifest_loader = ManifestLoader(manifest_path) mock_formatter = Mock() mock_scorer = Mock() diff --git a/tests/test_rule.py b/tests/test_rule.py index 76f9074..3413435 100644 --- a/tests/test_rule.py +++ b/tests/test_rule.py @@ -81,13 +81,13 @@ class BadRule(Rule): ], ) def test_rule_introspects_its_resource_type(request, rule_fixture, expected_type): - """Test that each rule is aware of the resource-type that it can be evaluated against.""" + """Test that each rule is aware of the resource-type it is evaluated against.""" rule = request.getfixturevalue(rule_fixture) assert rule().resource_type is expected_type class TestRuleFilterValidation: - """Tests that a rule filter must have a matching resource-type to the rule it's attached to.""" + """Tests that a rule filter matches resource-type to the rule it's attached to.""" @pytest.fixture def source_filter_no_parens(self): @@ -130,7 +130,7 @@ def evaluate(self, source: Source) -> bool: def test_rule_filter_must_match_resource_type_as_rule( self, request, rule_filter_fixture ): - """Tests that rules cannot be initialized when filters are of the wrong resource-type.""" + """Tests that rules can't be created with filters of incorrect resource-type.""" rule_filter = request.getfixturevalue(rule_filter_fixture) with pytest.raises(TypeError) as excinfo: From a5b2ac1be4eaf40c1850ab0948e45d1d1ae54190 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Thu, 31 Oct 2024 10:34:38 -0400 Subject: [PATCH 32/41] update changelog --- CHANGELOG.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e7270a..eb38473 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,9 +9,15 @@ and this project adheres to ## [Unreleased] - **Breaking**: Support linting of sources. -- **Breaking**: `--fail_any_model_under` becomes `--fail-any-item-under` and +- **Breaking**: Renamed modules: + `dbt_score.model_filter` becomes `dbt_score.rule_filter` +- **Breaking**: Renamed filter class and decorator: + `@model_filter` becomes `@rule_filter` and + `ModelFilter` becomes `RuleFilter`. +- **Breaking**: Config option `model_filter_names` becomes `rule_filter_names`. +- **Breaking**: CLI flag naming fixes: + `--fail_any_model_under` becomes `--fail-any-item-under` and `--fail_project_under` becomes `--fail-project-under`. -- **Breaking**: `model_filter_names` becomes `rule_filter_names`. ## [0.7.1] - 2024-11-01 From 8ca822f6084da143af6e0f4a1d7e3f3109124141 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Thu, 31 Oct 2024 11:57:17 -0400 Subject: [PATCH 33/41] remove hard dep on more-itertools by vendoring first_true --- src/dbt_score/models.py | 6 ++---- src/dbt_score/rule.py | 3 +-- src/dbt_score/rule_filter.py | 3 +-- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/dbt_score/models.py b/src/dbt_score/models.py index bc26b3b..79a76b1 100644 --- a/src/dbt_score/models.py +++ b/src/dbt_score/models.py @@ -8,8 +8,6 @@ from pathlib import Path from typing import Any, Iterable, Literal, TypeAlias -from more_itertools import first - from dbt_score.dbt_utils import dbt_ls logger = logging.getLogger(__name__) @@ -439,8 +437,8 @@ def _reindex_tests(self) -> None: # They need to be attributed to the source id # based on the `depends_on` field. elif node_values.get("sources") and ( - source_unique_id := first( - node_values.get("depends_on", {}).get("nodes", []), None + source_unique_id := next( + iter(node_values.get("depends_on", {}).get("nodes", [])), None ) ): self.tests[source_unique_id].append(node_values) diff --git a/src/dbt_score/rule.py b/src/dbt_score/rule.py index 130e00e..d05e868 100644 --- a/src/dbt_score/rule.py +++ b/src/dbt_score/rule.py @@ -6,9 +6,8 @@ from enum import Enum from typing import Any, Callable, Iterable, Type, TypeAlias, overload -from more_itertools import first_true - from dbt_score.models import Evaluable +from dbt_score.more_itertools import first_true from dbt_score.rule_filter import RuleFilter diff --git a/src/dbt_score/rule_filter.py b/src/dbt_score/rule_filter.py index 21adc35..3a5c01b 100644 --- a/src/dbt_score/rule_filter.py +++ b/src/dbt_score/rule_filter.py @@ -3,9 +3,8 @@ import typing from typing import Any, Callable, Type, TypeAlias, overload -from more_itertools import first_true - from dbt_score.models import Evaluable +from dbt_score.more_itertools import first_true FilterEvaluationType: TypeAlias = Callable[[Evaluable], bool] From 30900c4153a1ac2d60aafa793f6e8556648710bc Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Thu, 31 Oct 2024 16:13:47 -0400 Subject: [PATCH 34/41] actually commit more_itertools replacement --- src/dbt_score/more_itertools.py | 20 ++++++++++++++++++++ tests/test_more_itertools.py | 22 ++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 src/dbt_score/more_itertools.py create mode 100644 tests/test_more_itertools.py diff --git a/src/dbt_score/more_itertools.py b/src/dbt_score/more_itertools.py new file mode 100644 index 0000000..f91ff6c --- /dev/null +++ b/src/dbt_score/more_itertools.py @@ -0,0 +1,20 @@ +"""Vendored utility functions from https://github.com/more-itertools/more-itertools.""" + + +def first_true(iterable, default=None, pred=None): + """Returns the first true value in the iterable. + + If no true value is found, returns *default* + + If *pred* is not None, returns the first item for which + ``pred(item) == True`` . + + >>> first_true(range(10)) + 1 + >>> first_true(range(10), pred=lambda x: x > 5) + 6 + >>> first_true(range(10), default='missing', pred=lambda x: x > 9) + 'missing' + + """ + return next(filter(pred, iterable), default) diff --git a/tests/test_more_itertools.py b/tests/test_more_itertools.py new file mode 100644 index 0000000..2f29036 --- /dev/null +++ b/tests/test_more_itertools.py @@ -0,0 +1,22 @@ +"""Tests for vendored more_itertools functions.""" +from dbt_score.more_itertools import first_true + + +class TestFirstTrue: + """Tests for `first_true`.""" + + def test_something_true(self): + """Test with no keyword arguments.""" + assert first_true(range(10), 1) + + def test_nothing_true(self): + """Test default return value.""" + assert first_true([0, 0, 0]) is None + + def test_default(self): + """Test with a default keyword.""" + assert first_true([0, 0, 0], default="!") == "!" + + def test_pred(self): + """Test with a custom predicate.""" + assert first_true([2, 4, 6], pred=lambda x: x % 3 == 0) == 6 From 6204bc4c8d37ef4158e8d41312cf705b3568eeaa Mon Sep 17 00:00:00 2001 From: Oliver Tosky <42260747+otosky@users.noreply.github.com> Date: Fri, 1 Nov 2024 11:16:47 -0400 Subject: [PATCH 35/41] add newline Co-authored-by: Matthieu Caneill --- src/dbt_score/rule_filter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dbt_score/rule_filter.py b/src/dbt_score/rule_filter.py index 3a5c01b..dd4964e 100644 --- a/src/dbt_score/rule_filter.py +++ b/src/dbt_score/rule_filter.py @@ -1,4 +1,5 @@ """Evaluable filtering to choose when to apply specific rules.""" + import inspect import typing from typing import Any, Callable, Type, TypeAlias, overload From 7ecc1b2a3711a61c3cb147751d72b60aa3f5ee46 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Fri, 1 Nov 2024 11:18:54 -0400 Subject: [PATCH 36/41] remove breaking notice --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eb38473..6622a94 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ and this project adheres to ## [Unreleased] -- **Breaking**: Support linting of sources. +- Support linting of sources. - **Breaking**: Renamed modules: `dbt_score.model_filter` becomes `dbt_score.rule_filter` - **Breaking**: Renamed filter class and decorator: From cc6b70762f4ebb523bc3598bf7e02ff3d910542a Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Fri, 1 Nov 2024 12:04:59 -0400 Subject: [PATCH 37/41] fix manifest_formatter for source scores --- pyproject.toml | 1 + .../formatters/manifest_formatter.py | 10 +++++++-- tests/formatters/test_manifest_formatter.py | 22 +++++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e4b2ba1..a2f8caf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,6 +101,7 @@ max-args = 9 [tool.ruff.lint.per-file-ignores] "tests/**/*.py" = [ "PLR2004", # Magic value comparisons + "PLR0913", # Too many args in func def ] ### Coverage ### diff --git a/src/dbt_score/formatters/manifest_formatter.py b/src/dbt_score/formatters/manifest_formatter.py index 35356b8..0b49bd7 100644 --- a/src/dbt_score/formatters/manifest_formatter.py +++ b/src/dbt_score/formatters/manifest_formatter.py @@ -28,6 +28,12 @@ def project_evaluated(self, score: Score) -> None: """Callback when a project has been evaluated.""" manifest = copy.copy(self._manifest_loader.raw_manifest) for evaluable_id, evaluable_score in self._evaluable_scores.items(): - manifest["nodes"][evaluable_id]["meta"]["score"] = evaluable_score.value - manifest["nodes"][evaluable_id]["meta"]["badge"] = evaluable_score.badge + if evaluable_id.startswith("model"): + model_manifest = manifest["nodes"][evaluable_id] + model_manifest["meta"]["score"] = evaluable_score.value + model_manifest["meta"]["badge"] = evaluable_score.badge + if evaluable_id.startswith("source"): + source_manifest = manifest["sources"][evaluable_id] + source_manifest["meta"]["score"] = evaluable_score.value + source_manifest["meta"]["badge"] = evaluable_score.badge print(json.dumps(manifest, indent=2)) diff --git a/tests/formatters/test_manifest_formatter.py b/tests/formatters/test_manifest_formatter.py index e8d5527..a7da2c4 100644 --- a/tests/formatters/test_manifest_formatter.py +++ b/tests/formatters/test_manifest_formatter.py @@ -37,6 +37,8 @@ def test_manifest_formatter_project( manifest_loader, model1, model2, + source1, + source2, rule_severity_low, rule_severity_medium, rule_severity_critical, @@ -58,10 +60,30 @@ def test_manifest_formatter_project( formatter.evaluable_evaluated(model1, result1, Score(5.0, "🚧")) formatter.evaluable_evaluated(model2, result2, Score(10.0, "🥇")) + formatter.evaluable_evaluated(source1, result1, Score(5.0, "🚧")) + formatter.evaluable_evaluated(source2, result2, Score(10.0, "🥇")) formatter.project_evaluated(Score(7.5, "🥉")) + stdout = capsys.readouterr().out new_manifest = json.loads(stdout) assert new_manifest["nodes"]["model.package.model1"]["meta"]["score"] == 5.0 assert new_manifest["nodes"]["model.package.model1"]["meta"]["badge"] == "🚧" assert new_manifest["nodes"]["model.package.model2"]["meta"]["score"] == 10.0 assert new_manifest["nodes"]["model.package.model2"]["meta"]["badge"] == "🥇" + + assert ( + new_manifest["sources"]["source.package.my_source.table1"]["meta"]["score"] + == 5.0 + ) + assert ( + new_manifest["sources"]["source.package.my_source.table1"]["meta"]["badge"] + == "🚧" + ) + assert ( + new_manifest["sources"]["source.package.my_source.table2"]["meta"]["score"] + == 10.0 + ) + assert ( + new_manifest["sources"]["source.package.my_source.table2"]["meta"]["badge"] + == "🥇" + ) From ba16e645289640f20e61d193a6ecaef75a5af8b6 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Fri, 1 Nov 2024 22:49:00 -0400 Subject: [PATCH 38/41] run prettier on changelog --- CHANGELOG.md | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6622a94..211565a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,15 +9,14 @@ and this project adheres to ## [Unreleased] - Support linting of sources. -- **Breaking**: Renamed modules: - `dbt_score.model_filter` becomes `dbt_score.rule_filter` -- **Breaking**: Renamed filter class and decorator: - `@model_filter` becomes `@rule_filter` and - `ModelFilter` becomes `RuleFilter`. +- **Breaking**: Renamed modules: `dbt_score.model_filter` becomes + `dbt_score.rule_filter` +- **Breaking**: Renamed filter class and decorator: `@model_filter` becomes + `@rule_filter` and `ModelFilter` becomes `RuleFilter`. - **Breaking**: Config option `model_filter_names` becomes `rule_filter_names`. -- **Breaking**: CLI flag naming fixes: - `--fail_any_model_under` becomes `--fail-any-item-under` and - `--fail_project_under` becomes `--fail-project-under`. +- **Breaking**: CLI flag naming fixes: `--fail_any_model_under` becomes + `--fail-any-item-under` and `--fail_project_under` becomes + `--fail-project-under`. ## [0.7.1] - 2024-11-01 From 5ef7cae41e3d9e68402566f0e0076972174e21ae Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Fri, 1 Nov 2024 22:49:13 -0400 Subject: [PATCH 39/41] fix import --- tests/test_rule.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_rule.py b/tests/test_rule.py index 3413435..ffce2a1 100644 --- a/tests/test_rule.py +++ b/tests/test_rule.py @@ -2,9 +2,7 @@ import pytest from dbt_score import Model, Rule, RuleViolation, Severity, Source, rule -from dbt_score.rule_filter import rule_filter - -from src.dbt_score.rule_filter import RuleFilter +from dbt_score.rule_filter import RuleFilter, rule_filter def test_rule_decorator_and_class( From e19badc5f39cfcc43dfd30c66bf9339781150109 Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Tue, 12 Nov 2024 00:41:51 -0500 Subject: [PATCH 40/41] address mypy errors --- src/dbt_score/evaluation.py | 5 ++++- src/dbt_score/more_itertools.py | 32 ++++++++++++++++++++++++++++- src/dbt_score/rule.py | 36 +++++++++++++++++++++++---------- src/dbt_score/rule_filter.py | 24 +++++++++++++--------- tests/conftest.py | 4 ++-- 5 files changed, 77 insertions(+), 24 deletions(-) diff --git a/src/dbt_score/evaluation.py b/src/dbt_score/evaluation.py index 98a7081..bb29f03 100644 --- a/src/dbt_score/evaluation.py +++ b/src/dbt_score/evaluation.py @@ -3,7 +3,7 @@ from __future__ import annotations from itertools import chain -from typing import Type +from typing import Type, cast from dbt_score.formatters import Formatter from dbt_score.models import Evaluable, ManifestLoader @@ -57,6 +57,9 @@ def evaluate(self) -> None: for evaluable in chain( self._manifest_loader.models, self._manifest_loader.sources ): + # type inference on elements from `chain` is wonky + # and resolves to superclass HasColumnsMixin + evaluable = cast(Evaluable, evaluable) self.results[evaluable] = {} for rule in rules: try: diff --git a/src/dbt_score/more_itertools.py b/src/dbt_score/more_itertools.py index f91ff6c..e1d09a5 100644 --- a/src/dbt_score/more_itertools.py +++ b/src/dbt_score/more_itertools.py @@ -1,7 +1,37 @@ """Vendored utility functions from https://github.com/more-itertools/more-itertools.""" +from typing import ( + Callable, + Iterable, + Optional, + TypeVar, + overload, +) +_T = TypeVar("_T") +_U = TypeVar("_U") -def first_true(iterable, default=None, pred=None): + +@overload +def first_true( + iterable: Iterable[_T], *, pred: Callable[[_T], object] | None = ... +) -> _T | None: + ... + + +@overload +def first_true( + iterable: Iterable[_T], + default: _U, + pred: Callable[[_T], object] | None = ..., +) -> _T | _U: + ... + + +def first_true( + iterable: Iterable[_T], + default: Optional[_U] = None, + pred: Optional[Callable[[_T], object]] = None, +) -> _T | _U | None: """Returns the first true value in the iterable. If no true value is found, returns *default* diff --git a/src/dbt_score/rule.py b/src/dbt_score/rule.py index d05e868..e01ce55 100644 --- a/src/dbt_score/rule.py +++ b/src/dbt_score/rule.py @@ -4,9 +4,17 @@ import typing from dataclasses import dataclass, field from enum import Enum -from typing import Any, Callable, Iterable, Type, TypeAlias, overload - -from dbt_score.models import Evaluable +from typing import ( + Any, + Callable, + Iterable, + Type, + TypeAlias, + cast, + overload, +) + +from dbt_score.models import Evaluable, Model, Source from dbt_score.more_itertools import first_true from dbt_score.rule_filter import RuleFilter @@ -55,7 +63,9 @@ class RuleViolation: message: str | None = None -RuleEvaluationType: TypeAlias = Callable[[Evaluable], RuleViolation | None] +ModelRuleEvaluationType: TypeAlias = Callable[[Model], RuleViolation | None] +SourceRuleEvaluationType: TypeAlias = Callable[[Source], RuleViolation | None] +RuleEvaluationType: TypeAlias = ModelRuleEvaluationType | SourceRuleEvaluationType class Rule: @@ -66,7 +76,7 @@ class Rule: rule_filter_names: list[str] rule_filters: frozenset[RuleFilter] = frozenset() default_config: typing.ClassVar[dict[str, Any]] = {} - resource_type: typing.ClassVar[Evaluable] + resource_type: typing.ClassVar[type[Evaluable]] def __init__(self, rule_config: RuleConfig | None = None) -> None: """Initialize the rule.""" @@ -85,7 +95,7 @@ def __init_subclass__(cls, **kwargs) -> None: # type: ignore cls._validate_rule_filters() @classmethod - def _validate_rule_filters(cls): + def _validate_rule_filters(cls) -> None: for rule_filter in cls.rule_filters: if rule_filter.resource_type != cls.resource_type: raise TypeError( @@ -111,7 +121,8 @@ def _introspect_resource_type(cls) -> Type[Evaluable]: "annotated Model or Source argument." ) - return resource_type_argument.annotation + resource_type = cast(type[Evaluable], resource_type_argument.annotation) + return resource_type def process_config(self, rule_config: RuleConfig) -> None: """Process the rule config.""" @@ -178,7 +189,12 @@ def __hash__(self) -> int: @overload -def rule(__func: RuleEvaluationType) -> Type[Rule]: +def rule(__func: ModelRuleEvaluationType) -> Type[Rule]: + ... + + +@overload +def rule(__func: SourceRuleEvaluationType) -> Type[Rule]: ... @@ -214,9 +230,7 @@ def rule( rule_filters: Set of RuleFilter that filters the items that the rule applies to. """ - def decorator_rule( - func: RuleEvaluationType, - ) -> Type[Rule]: + def decorator_rule(func: RuleEvaluationType) -> Type[Rule]: """Decorator function.""" if func.__doc__ is None and description is None: raise AttributeError("Rule must define `description` or `func.__doc__`.") diff --git a/src/dbt_score/rule_filter.py b/src/dbt_score/rule_filter.py index dd4964e..c8e0e46 100644 --- a/src/dbt_score/rule_filter.py +++ b/src/dbt_score/rule_filter.py @@ -2,19 +2,21 @@ import inspect import typing -from typing import Any, Callable, Type, TypeAlias, overload +from typing import Any, Callable, Type, TypeAlias, cast, overload -from dbt_score.models import Evaluable +from dbt_score.models import Evaluable, Model, Source from dbt_score.more_itertools import first_true -FilterEvaluationType: TypeAlias = Callable[[Evaluable], bool] +ModelFilterEvaluationType: TypeAlias = Callable[[Model], bool] +SourceFilterEvaluationType: TypeAlias = Callable[[Source], bool] +FilterEvaluationType: TypeAlias = ModelFilterEvaluationType | SourceFilterEvaluationType class RuleFilter: """The Filter base class.""" description: str - resource_type: typing.ClassVar[Evaluable] + resource_type: typing.ClassVar[type[Evaluable]] def __init__(self) -> None: """Initialize the filter.""" @@ -44,7 +46,8 @@ def _introspect_resource_type(cls) -> Type[Evaluable]: "annotated Model or Source argument." ) - return resource_type_argument.annotation + resource_type = cast(type[Evaluable], resource_type_argument.annotation) + return resource_type def evaluate(self, evaluable: Evaluable) -> bool: """Evaluates the filter.""" @@ -65,7 +68,12 @@ def __hash__(self) -> int: @overload -def rule_filter(__func: FilterEvaluationType) -> Type[RuleFilter]: +def rule_filter(__func: ModelFilterEvaluationType) -> Type[RuleFilter]: + ... + + +@overload +def rule_filter(__func: SourceFilterEvaluationType) -> Type[RuleFilter]: ... @@ -96,9 +104,7 @@ def rule_filter( description: The description of the filter. """ - def decorator_filter( - func: FilterEvaluationType, - ) -> Type[RuleFilter]: + def decorator_filter(func: FilterEvaluationType) -> Type[RuleFilter]: """Decorator function.""" if func.__doc__ is None and description is None: raise AttributeError( diff --git a/tests/conftest.py b/tests/conftest.py index 9022730..fec3163 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -77,7 +77,7 @@ def model2(raw_manifest) -> Model: @fixture -def source1(raw_manifest) -> Model: +def source1(raw_manifest) -> Source: """Source 1.""" return Source.from_node( raw_manifest["sources"]["source.package.my_source.table1"], [] @@ -85,7 +85,7 @@ def source1(raw_manifest) -> Model: @fixture -def source2(raw_manifest) -> Model: +def source2(raw_manifest) -> Source: """Source 2.""" return Source.from_node( raw_manifest["sources"]["source.package.my_source.table2"], [] From 0732e15e2beb63c219440ce30b3a0f0900853a4a Mon Sep 17 00:00:00 2001 From: Oliver Tosky Date: Tue, 12 Nov 2024 10:36:10 -0500 Subject: [PATCH 41/41] mypy ignore Liskov violations on class-based rule/filter --- tests/conftest.py | 12 ++++++------ tests/test_rule.py | 6 +++--- tests/test_rule_filter.py | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index fec3163..b984398 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -142,7 +142,7 @@ class ExampleRule(Rule): description = "Description of the rule." - def evaluate(self, model: Model) -> RuleViolation | None: + def evaluate(self, model: Model) -> RuleViolation | None: # type: ignore[override] """Evaluate model.""" if model.name == "model1": return RuleViolation(message="Model1 is a violation.") @@ -197,7 +197,7 @@ class ExampleRuleSource(Rule): description = "Description of the rule." - def evaluate(self, source: Source) -> RuleViolation | None: + def evaluate(self, source: Source) -> RuleViolation | None: # type: ignore[override] """Evaluate source.""" if source.name == "table1": return RuleViolation(message="Source1 is a violation.") @@ -328,7 +328,7 @@ def model_class_rule_with_filter() -> Type[Rule]: class SkipModel1(RuleFilter): description = "Filter defined by a class." - def evaluate(self, model: Model) -> bool: + def evaluate(self, model: Model) -> bool: # type: ignore[override] """Skips for model1, passes for model2.""" return model.name != "model1" @@ -336,7 +336,7 @@ class ModelRuleWithFilter(Rule): description = "Filter defined by a class." rule_filters = frozenset({SkipModel1()}) - def evaluate(self, model: Model) -> RuleViolation | None: + def evaluate(self, model: Model) -> RuleViolation | None: # type: ignore[override] return RuleViolation(message="I always fail.") return ModelRuleWithFilter @@ -349,7 +349,7 @@ def source_class_rule_with_filter() -> Type[Rule]: class SkipSource1(RuleFilter): description = "Filter defined by a class." - def evaluate(self, source: Source) -> bool: + def evaluate(self, source: Source) -> bool: # type: ignore[override] """Skips for source1, passes for source2.""" return source.name != "table1" @@ -357,7 +357,7 @@ class SourceRuleWithFilter(Rule): description = "Filter defined by a class." rule_filters = frozenset({SkipSource1()}) - def evaluate(self, source: Source) -> RuleViolation | None: + def evaluate(self, source: Source) -> RuleViolation | None: # type: ignore[override] return RuleViolation(message="I always fail.") return SourceRuleWithFilter diff --git a/tests/test_rule.py b/tests/test_rule.py index ffce2a1..ff9efb5 100644 --- a/tests/test_rule.py +++ b/tests/test_rule.py @@ -50,7 +50,7 @@ def test_missing_description_rule_class(): class BadRule(Rule): """Bad example rule.""" - def evaluate(self, model: Model) -> RuleViolation | None: + def evaluate(self, model: Model) -> RuleViolation | None: # type: ignore[override] """Evaluate model.""" return None @@ -116,7 +116,7 @@ def source_filter_class(self): class SourceFilter(RuleFilter): description = "Description" - def evaluate(self, source: Source) -> bool: + def evaluate(self, source: Source) -> bool: # type: ignore[override] return False return SourceFilter @@ -147,7 +147,7 @@ class ModelAlwaysPasses(Rule): description = "Description." rule_filters = frozenset([rule_filter]) - def evaluate(self, model: Model) -> RuleViolation | None: + def evaluate(self, model: Model) -> RuleViolation | None: # type: ignore[override] pass assert "Mismatched resource_type on filter" in str(excinfo.value) diff --git a/tests/test_rule_filter.py b/tests/test_rule_filter.py index 51715ea..ae6909e 100644 --- a/tests/test_rule_filter.py +++ b/tests/test_rule_filter.py @@ -38,7 +38,7 @@ def test_class_filter(model1, model2): class OnlyModel1(RuleFilter): description = "Some description." - def evaluate(self, model: Model) -> bool: + def evaluate(self, model: Model) -> bool: # type: ignore[override] return model.name == "model1" instance = OnlyModel1() @@ -53,7 +53,7 @@ def test_class_filter_with_sources(source1, source2): class OnlySource1(RuleFilter): description = "Some description." - def evaluate(self, source: Source) -> bool: + def evaluate(self, source: Source) -> bool: # type: ignore[override] return source.name == "table1" instance = OnlySource1() @@ -78,7 +78,7 @@ def test_missing_description_rule_class(): class BadFilter(RuleFilter): """Bad example filter.""" - def evaluate(self, model: Model) -> bool: + def evaluate(self, model: Model) -> bool: # type: ignore[override] """Evaluate filter.""" return True