Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate SqlFluff to Dbt cli, format, lint ..etc #18

Merged
merged 3 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions opendbt/dbt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,7 @@ def patch_dbt():
f"Unsupported dbt version {dbt_version}, please make sure dbt version is supported/integrated by opendbt")

# shared code patches
import opendbt.dbt.shared.cli.main
dbt.cli.main.sqlfluff = opendbt.dbt.shared.cli.main.sqlfluff
dbt.cli.main.sqlfluff_lint = opendbt.dbt.shared.cli.main.sqlfluff_lint
dbt.cli.main.sqlfluff_fix = opendbt.dbt.shared.cli.main.sqlfluff_fix
Empty file.
103 changes: 103 additions & 0 deletions opendbt/dbt/shared/cli/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import click
from dbt.cli import requires, params as p
from dbt.cli.main import global_flags, cli

from opendbt.dbt.shared.task.sqlfluff import SqlFluffTasks


# dbt docs
@cli.group()
@click.pass_context
@global_flags
def sqlfluff(ctx, **kwargs):
"""Generate or serve the documentation website for your project"""


# dbt docs generate
@sqlfluff.command("lint")
@click.pass_context
@global_flags
@p.defer
@p.deprecated_defer
@p.exclude
@p.favor_state
@p.deprecated_favor_state
@p.full_refresh
@p.indirect_selection
@p.profile
@p.profiles_dir
@p.project_dir
@p.resource_type
@p.select
@p.selector
@p.show
@p.state
@p.defer_state
@p.deprecated_state
@p.store_failures
@p.target
@p.target_path
@p.threads
@p.vars
@requires.postflight
@requires.preflight
@requires.profile
@requires.project
@requires.runtime_config
@requires.manifest(write=False)
def sqlfluff_lint(ctx, **kwargs):
"""Generate the documentation website for your project"""
task = SqlFluffTasks(
ctx.obj["flags"],
ctx.obj["runtime_config"],
ctx.obj["manifest"],
)

results = task.lint()
success = task.interpret_results(results)
return results, success


# dbt docs generate
@sqlfluff.command("fix")
@click.pass_context
@global_flags
@p.defer
@p.deprecated_defer
@p.exclude
@p.favor_state
@p.deprecated_favor_state
@p.full_refresh
@p.indirect_selection
@p.profile
@p.profiles_dir
@p.project_dir
@p.resource_type
@p.select
@p.selector
@p.show
@p.state
@p.defer_state
@p.deprecated_state
@p.store_failures
@p.target
@p.target_path
@p.threads
@p.vars
@requires.postflight
@requires.preflight
@requires.profile
@requires.project
@requires.runtime_config
@requires.manifest(write=False)
def sqlfluff_fix(ctx, **kwargs):
"""Generate the documentation website for your project"""
task = SqlFluffTasks(
ctx.obj["flags"],
ctx.obj["runtime_config"],
ctx.obj["manifest"],
)

results = task.fix()
success = task.interpret_results(results)
return results, success
Empty file.
87 changes: 87 additions & 0 deletions opendbt/dbt/shared/task/sqlfluff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import os
from datetime import datetime
from pathlib import Path
from typing import Optional

from dbt.config import RuntimeConfig
from dbt.contracts.results import (
CatalogResults,
CatalogArtifact, RunExecutionResult,
)
from dbt.task.compile import CompileTask
from sqlfluff.cli import commands
from sqlfluff.core import Linter, FluffConfig
from sqlfluff.core.linter import LintingResult
from sqlfluff_templater_dbt import DbtTemplater


class SqlFluffTasks(CompileTask):

def __init__(self, args, config, manifest):
super().__init__(args, config, manifest)

self.sqlfluff_config = FluffConfig.from_path(path=self.config.project_root)

templater_obj = self.sqlfluff_config._configs["core"]["templater_obj"]
if isinstance(templater_obj, DbtTemplater):
templater_obj: DbtTemplater
self.config: RuntimeConfig
templater_obj.project_root = self.config.project_root
templater_obj.working_dir = self.config.project_root
self.linter = Linter(self.sqlfluff_config)

def get_result(self, elapsed_time: float, violations: list, num_violations: int):
run_result = RunExecutionResult(
results=[],
elapsed_time=elapsed_time,
generated_at=datetime.now(),
# args=dbt.utils.args_to_dict(self.args),
args={},
)
result = CatalogArtifact.from_results(
nodes={},
sources={},
generated_at=datetime.now(),
errors=violations if violations else None,
compile_results=run_result,
)
if num_violations > 0:
setattr(result, 'exception', Exception(f"Linting {num_violations} errors found!"))
result.exception = Exception(f"Linting {num_violations} errors found!")

return result

def lint(self) -> CatalogArtifact:
os.chdir(self.config.project_root)
lint_result: LintingResult = self.linter.lint_paths(paths=(self.config.project_root,))
result = self.get_result(lint_result.total_time, lint_result.get_violations(), lint_result.num_violations())
if lint_result.num_violations() > 0:
print(f"Linting {lint_result.num_violations()} errors found!")
for error in lint_result.as_records():
filepath = Path(error['filepath'])
violations: list = error['violations']
if violations:
print(f"File: {filepath.relative_to(self.config.project_root)}")
for violation in violations:
print(f" {violation}")
# print(f"Code:{violation['code']} Line:{violation['start_line_no']}, LinePos:{violation['start_line_pos']} {violation['description']}")
return result

def fix(self) -> CatalogArtifact:
os.chdir(self.config.project_root)
lnt, formatter = commands.get_linter_and_formatter(cfg=self.sqlfluff_config)
lint_result: LintingResult = lnt.lint_paths(
paths=(self.config.project_root,),
fix=True,
apply_fixes=True
)
result = self.get_result(lint_result.total_time, [], 0)
return result

@classmethod
def interpret_results(self, results: Optional[CatalogResults]) -> bool:
if results is None:
return False
if hasattr(results, "errors") and results.errors:
return False
return True
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
include_package_data=True,
license="Apache License 2.0",
test_suite='tests',
install_requires=["dbt-duckdb>=1.6"],
install_requires=["dbt-duckdb>=1.6", "sqlfluff", "sqlfluff-templater-dbt"],
extras_require={
"airflow": ["apache-airflow"],
"test": ["testcontainers>=3.7,<4.9"],
Expand Down
53 changes: 53 additions & 0 deletions tests/resources/dbttest/.sqlfluff
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
[sqlfluff]
templater = dbt
dialect = duckdb
# This change (from jinja to dbt templater) will make linting slower
# because linting will first compile dbt code into data warehouse code.
runaway_limit = 1000
max_line_length = 180
indent_unit = space

[sqlfluff:indentation]
tab_space_size = 4

[sqlfluff:layout:type:comma]
spacing_before = touch
line_position = trailing

# For rule specific configuration, use dots between the names exactly
# as you would in .sqlfluff. In the background, SQLFluff will unpack the
# configuration paths accordingly.
[tool.sqlfluff.rules.capitalisation.keywords]
capitalisation_policy = "upper"

# The default configuration for capitalisation rules is "consistent"
# which will auto-detect the setting from the rest of the file. This
# is less desirable in a new project and you may find this (slightly
# more strict) setting more useful.
# Typically we find users rely on syntax highlighting rather than
# capitalisation to distinguish between keywords and identifiers.
# Clearly, if your organisation has already settled on uppercase
# formatting for any of these syntax elements then set them to "upper".
# See https://stackoverflow.com/questions/608196/why-should-i-capitalize-my-sql-keywords-is-there-a-good-reason
[sqlfluff:rules:capitalisation.keywords]
capitalisation_policy = upper
[sqlfluff:rules:capitalisation.identifiers]
capitalisation_policy = upper
[sqlfluff:rules:capitalisation.functions]
extended_capitalisation_policy = upper
# [sqlfluff:rules:capitalisation.literals]
# capitalisation_policy = lower
[sqlfluff:rules:capitalisation.types]
extended_capitalisation_policy = upper

[sqlfluff:rules:aliasing.table]
aliasing = explicit

[sqlfluff:rules:aliasing.column]
aliasing = explicit

[sqlfluff:rules:aliasing.expression]
allow_scalar = False

[sqlfluff:rules:ambiguous.column_references] # Number in group by
group_by_and_order_by_style = implicit
18 changes: 18 additions & 0 deletions tests/test_dbt_sqlfluff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from pathlib import Path
from unittest import TestCase

from opendbt import OpenDbtProject


class TestDbtSqlFluff(TestCase):
RESOURCES_DIR = Path(__file__).parent.joinpath("resources")
DBTTEST_DIR = RESOURCES_DIR.joinpath("dbttest")

def test_run_sqlfluff_lint(self):
dp = OpenDbtProject(project_dir=self.DBTTEST_DIR, profiles_dir=self.DBTTEST_DIR)
dp.run(command="sqlfluff", args=['fix'])
dp.run(command="sqlfluff", args=['lint'])

def test_run_sqlfluff_fix(self):
dp = OpenDbtProject(project_dir=self.DBTTEST_DIR, profiles_dir=self.DBTTEST_DIR)
dp.run(command="sqlfluff", args=['fix'])
Loading