Skip to content

Commit 47d447c

Browse files
authored
(fix): MotherDuck config should set SaaS mode at the end (#446)
* make sure to set saas_mode after the MD token is set * generalize how config settings are parsed from input * add tests to confirm that SaaS mode also works when I attach a MD database * pin DuckDB to 1.1.1 for MD tests
1 parent acf622d commit 47d447c

File tree

9 files changed

+251
-159
lines changed

9 files changed

+251
-159
lines changed

dbt/adapters/duckdb/environments/motherduck.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def motherduck_saas_mode(self, handle: DuckDBConnectionWrapper):
2323
# Get SaaS mode from DuckDB config
2424
con = handle.cursor()
2525
(motherduck_saas_mode,) = con.sql(MOTHERDUCK_SAAS_MODE_QUERY).fetchone()
26-
if motherduck_saas_mode.lower() in ["1", "true"]:
26+
if str(motherduck_saas_mode).lower() in ["1", "true"]:
2727
self._motherduck_saas_mode = True
2828
return True
2929
return False

dbt/adapters/duckdb/plugins/glue.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ def _add_partition_columns(
216216
# Remove columns from StorageDescriptor if they match with partition columns to avoid duplicate columns
217217
for p_column in partition_columns:
218218
table_def["StorageDescriptor"]["Columns"] = [
219-
column
219+
column # type: ignore
220220
for column in table_def["StorageDescriptor"]["Columns"]
221221
if not (column["Name"] == p_column["Name"] and column["Type"] == p_column["Type"])
222222
]

dbt/adapters/duckdb/plugins/motherduck.py

+53-45
Original file line numberDiff line numberDiff line change
@@ -10,72 +10,80 @@
1010
from dbt.adapters.duckdb.credentials import DuckDBCredentials
1111
from dbt.version import __version__
1212

13-
TOKEN = "token"
14-
MOTHERDUCK_TOKEN = "motherduck_token"
1513
CUSTOM_USER_AGENT = "custom_user_agent"
1614
MOTHERDUCK_EXT = "motherduck"
17-
MOTHERDUCK_CONFIG_OPTIONS = [MOTHERDUCK_TOKEN]
15+
# MotherDuck config options, in order in which they need to be set
16+
# (SaaS mode is last because it locks other config options)
17+
MOTHERDUCK_CONFIG_OPTIONS = [
18+
"motherduck_token",
19+
"motherduck_attach_mode",
20+
"motherduck_saas_mode",
21+
]
1822

1923

2024
class Plugin(BasePlugin):
2125
def initialize(self, plugin_config: Dict[str, Any]):
2226
self._config = plugin_config
23-
self._token = self.token_from_config(plugin_config)
27+
28+
@staticmethod
29+
def get_config_from_path(path):
30+
return {key: value[0] for key, value in parse_qs(urlparse(path).query).items()}
31+
32+
@staticmethod
33+
def get_md_config_settings(config):
34+
# Get MotherDuck config settings
35+
md_config = {}
36+
for name in MOTHERDUCK_CONFIG_OPTIONS:
37+
for key in [
38+
name,
39+
name.replace("motherduck_", ""),
40+
name.upper(),
41+
name.replace("motherduck_", "").upper(),
42+
]:
43+
if key in config:
44+
md_config[name] = config[key]
45+
46+
# Sort values (SaaS mode should be set last)
47+
return dict(
48+
sorted(
49+
md_config.items(),
50+
key=lambda x: MOTHERDUCK_CONFIG_OPTIONS.index(x[0]),
51+
)
52+
)
2453

2554
def configure_connection(self, conn: DuckDBPyConnection):
2655
conn.load_extension(MOTHERDUCK_EXT)
2756
# If a MotherDuck database is in attachments,
2857
# set config options *before* attaching
2958
if self.creds is not None and self.creds.is_motherduck_attach:
30-
# Check if the config options are specified in the path
59+
config = {}
60+
61+
# add config options specified in the path
3162
for attachment in self.creds.motherduck_attach:
32-
parsed = urlparse(attachment.path)
33-
qs = parse_qs(parsed.query)
34-
for KEY in MOTHERDUCK_CONFIG_OPTIONS:
35-
value = qs.get(KEY)
36-
if value:
37-
conn.execute(f"SET {KEY} = '{value[0]}'")
38-
# If config options are specified via plugin config, set them here
39-
if self._config:
40-
conn.execute(f"SET {MOTHERDUCK_TOKEN} = '{self._token}'")
41-
elif self.creds.settings:
42-
if MOTHERDUCK_TOKEN in self.creds.settings:
43-
token = self.creds.settings.pop(MOTHERDUCK_TOKEN)
44-
conn.execute(f"SET {MOTHERDUCK_TOKEN} = '{token}'")
63+
config.update(self.get_config_from_path(attachment.path))
4564

46-
@staticmethod
47-
def token_from_config(config: Dict[str, Any]) -> str:
48-
"""Load the token from the MotherDuck plugin config
49-
If not specified, this returns an empty string
65+
# add config options specified via plugin config
66+
config.update(self._config)
5067

51-
:param str: MotherDuck token
52-
"""
53-
if (
54-
TOKEN in config
55-
or TOKEN.upper() in config
56-
or MOTHERDUCK_TOKEN in config
57-
or MOTHERDUCK_TOKEN.upper() in config
58-
):
59-
token = (
60-
config.get(TOKEN)
61-
or config.get(TOKEN.upper())
62-
or config.get(MOTHERDUCK_TOKEN)
63-
or config.get(MOTHERDUCK_TOKEN.upper())
64-
)
65-
return str(token)
66-
return ""
68+
# add config options specified via settings
69+
if self.creds.settings is not None:
70+
config.update(self.creds.settings)
71+
72+
# set MD config options and remove from settings
73+
for key, value in self.get_md_config_settings(config).items():
74+
conn.execute(f"SET {key} = '{value}'")
75+
if self.creds.settings is not None and key in self.creds.settings:
76+
self.creds.settings.pop(key)
6777

6878
def update_connection_config(self, creds: DuckDBCredentials, config: Dict[str, Any]):
6979
user_agent = f"dbt/{__version__} dbt-duckdb/{__plugin_version__}"
70-
if CUSTOM_USER_AGENT in config:
71-
user_agent = f"{user_agent} {config[CUSTOM_USER_AGENT]}"
7280
settings: Dict[str, Any] = creds.settings or {}
73-
if CUSTOM_USER_AGENT in settings:
74-
user_agent = f"{user_agent} {settings.pop(CUSTOM_USER_AGENT)}"
75-
81+
custom_user_agent = config.get(CUSTOM_USER_AGENT) or settings.pop(CUSTOM_USER_AGENT, None)
82+
if custom_user_agent:
83+
user_agent = f"{user_agent} {custom_user_agent}"
7684
config[CUSTOM_USER_AGENT] = user_agent
7785

7886
# If a user specified MotherDuck config options via the plugin config,
7987
# pass it to the config kwarg in duckdb.connect.
80-
if not creds.is_motherduck_attach and self._token:
81-
config[MOTHERDUCK_TOKEN] = self._token
88+
if not creds.is_motherduck_attach:
89+
config.update(self.get_md_config_settings(self._config))

setup.cfg

+2
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ requires = ["setuptools >= 61.2", "pbr>=1.9"]
4444
glue =
4545
boto3
4646
mypy-boto3-glue
47+
md =
48+
duckdb==1.1.1
4749

4850
[files]
4951
packages =

tests/functional/plugins/test_motherduck_attach.py tests/functional/plugins/motherduck/test_motherduck_attach.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -90,20 +90,18 @@ def md_sql(self, database_name):
9090

9191
@pytest.fixture(autouse=True)
9292
def run_dbt_scope(self, project, database_name):
93-
# CREATE DATABASE does not work with SaaS mode on duckdb 1.0.0
94-
# This will be fixed in duckdb 1.1.0
95-
# project.run_sql(f"CREATE DATABASE IF NOT EXISTS {database_name}")
96-
project.run_sql("CREATE OR REPLACE TABLE plugin_table (i integer, j string)")
97-
project.run_sql("INSERT INTO plugin_table (i, j) VALUES (1, 'foo')")
93+
project.run_sql(f"CREATE DATABASE IF NOT EXISTS {database_name}")
94+
project.run_sql(f"CREATE OR REPLACE TABLE {database_name}.plugin_table (i integer, j string)")
95+
project.run_sql(f"INSERT INTO {database_name}.plugin_table (i, j) VALUES (1, 'foo')")
9896
yield
9997
project.run_sql("DROP VIEW md_table")
10098
project.run_sql("DROP TABLE random_logs_test")
10199
project.run_sql("DROP TABLE summary_of_logs_test")
102-
project.run_sql("DROP TABLE plugin_table")
100+
project.run_sql(f"DROP TABLE {database_name}.plugin_table")
103101
project.run_sql("DROP TABLE python_pyarrow_table_model")
104102

105103
def test_motherduck(self, project):
106-
run_dbt(expect_pass=False)
104+
run_dbt(expect_pass=True)
107105

108106

109107
@pytest.mark.skip_profile("buenavista", "file", "memory")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
from urllib.parse import urlparse
2+
import pytest
3+
from dbt.tests.util import (
4+
run_dbt,
5+
)
6+
from dbt.artifacts.schemas.results import RunStatus
7+
8+
from dbt.adapters.duckdb.environments.motherduck import MOTHERDUCK_SAAS_MODE_QUERY
9+
10+
random_logs_sql = """
11+
{{ config(materialized='table', meta=dict(temp_schema_name='dbt_temp_test')) }}
12+
13+
select
14+
uuid()::varchar as log_id,
15+
'2023-10-01'::timestamp + interval 1 minute * (random() * 20000)::int as dt ,
16+
(random() * 4)::int64 as user_id
17+
from generate_series(1, 10000) g(x)
18+
"""
19+
20+
summary_of_logs_sql = """
21+
{{
22+
config(
23+
materialized='incremental',
24+
meta=dict(temp_schema_name='dbt_temp_test'),
25+
)
26+
}}
27+
28+
select dt::date as dt, user_id, count(1) as c
29+
from {{ ref('random_logs_test') }}
30+
31+
32+
{% if is_incremental() %}
33+
34+
-- this filter will only be applied on an incremental run
35+
-- (uses > to include records whose timestamp occurred since the last run of this model)
36+
where dt > '2023-10-08'::timestamp
37+
38+
{% endif %}
39+
group by all
40+
"""
41+
42+
python_pyarrow_table_model = """
43+
import pyarrow as pa
44+
45+
def model(dbt, con):
46+
return pa.Table.from_pydict({"a": [1,2,3]})
47+
"""
48+
49+
@pytest.mark.skip_profile("buenavista", "file", "memory")
50+
class TestMDPluginSaaSMode:
51+
@pytest.fixture(scope="class")
52+
def profiles_config_update(self, dbt_profile_target):
53+
md_config = {"motherduck_token": dbt_profile_target.get("token"), "motherduck_saas_mode": True}
54+
return {
55+
"test": {
56+
"outputs": {
57+
"dev": {
58+
"type": "duckdb",
59+
"path": dbt_profile_target.get("path", ":memory:") + "?user=1",
60+
"config_options": md_config,
61+
}
62+
},
63+
"target": "dev",
64+
}
65+
}
66+
67+
@pytest.fixture(scope="class")
68+
def models(self, md_sql):
69+
return {
70+
"md_table.sql": md_sql,
71+
"random_logs_test.sql": random_logs_sql,
72+
"summary_of_logs_test.sql": summary_of_logs_sql,
73+
"python_pyarrow_table_model.py": python_pyarrow_table_model,
74+
}
75+
76+
@pytest.fixture(scope="class")
77+
def database_name(self, dbt_profile_target):
78+
return urlparse(dbt_profile_target["path"]).path
79+
80+
@pytest.fixture(scope="class")
81+
def md_sql(self, database_name):
82+
# Reads from a MD database in my test account in the cloud
83+
return f"""
84+
select * FROM {database_name}.main.plugin_table
85+
"""
86+
87+
@pytest.fixture(autouse=True)
88+
def run_dbt_scope(self, project, database_name):
89+
# CREATE DATABASE does not work with SaaS mode on duckdb 1.0.0
90+
# This will be fixed in duckdb 1.1.1
91+
# project.run_sql(f"CREATE DATABASE IF NOT EXISTS {database_name}")
92+
project.run_sql(f"CREATE OR REPLACE TABLE {database_name}.plugin_table (i integer, j string)")
93+
project.run_sql(f"INSERT INTO {database_name}.plugin_table (i, j) VALUES (1, 'foo')")
94+
yield
95+
project.run_sql("DROP VIEW md_table")
96+
project.run_sql("DROP TABLE random_logs_test")
97+
project.run_sql("DROP TABLE summary_of_logs_test")
98+
project.run_sql(f"DROP TABLE {database_name}.plugin_table")
99+
100+
def test_motherduck(self, project):
101+
(motherduck_saas_mode,) = project.run_sql(MOTHERDUCK_SAAS_MODE_QUERY, fetch="one")
102+
if str(motherduck_saas_mode).lower() not in ["1", "true"]:
103+
raise ValueError("SaaS mode was not set")
104+
result = run_dbt(expect_pass=False)
105+
expected_msg = "Python models are disabled when MotherDuck SaaS Mode is on."
106+
assert [_res for _res in result.results if _res.status != RunStatus.Success][0].message == expected_msg
107+
108+
109+
@pytest.mark.skip_profile("buenavista", "file", "memory")
110+
class TestMDPluginSaaSModeViaAttach(TestMDPluginSaaSMode):
111+
@pytest.fixture(scope="class")
112+
def profiles_config_update(self, dbt_profile_target):
113+
md_config = {
114+
"token": dbt_profile_target.get("token"),
115+
"saas_mode": 1
116+
}
117+
plugins = [{"module": "motherduck", "config": md_config}]
118+
return {
119+
"test": {
120+
"outputs": {
121+
"dev": {
122+
"type": "duckdb",
123+
"path": ":memory:",
124+
"plugins": plugins,
125+
"attach": [
126+
{
127+
"path": dbt_profile_target.get("path", ":memory:") + "?user=2",
128+
"type": "motherduck"
129+
}
130+
]
131+
}
132+
},
133+
"target": "dev",
134+
}
135+
}
136+
137+
138+
@pytest.mark.skip_profile("buenavista", "file", "memory")
139+
class TestMDPluginSaaSModeViaAttachWithSettings(TestMDPluginSaaSMode):
140+
@pytest.fixture(scope="class")
141+
def profiles_config_update(self, dbt_profile_target):
142+
md_setting = {
143+
"motherduck_token": dbt_profile_target.get("token"),
144+
"motherduck_saas_mode": True
145+
}
146+
return {
147+
"test": {
148+
"outputs": {
149+
"dev": {
150+
"type": "duckdb",
151+
"path": ":memory:",
152+
"attach": [
153+
{
154+
"path": dbt_profile_target.get("path", ":memory:") + "?user=3",
155+
"type": "motherduck"
156+
}
157+
],
158+
"settings": md_setting
159+
}
160+
},
161+
"target": "dev",
162+
}
163+
}
164+
165+
166+
@pytest.mark.skip_profile("buenavista", "file", "memory")
167+
class TestMDPluginSaaSModeViaAttachWithTokenInPath(TestMDPluginSaaSMode):
168+
@pytest.fixture(scope="class")
169+
def profiles_config_update(self, dbt_profile_target):
170+
token = dbt_profile_target.get("token")
171+
qs = f"?motherduck_token={token}&saas_mode=true&user=4"
172+
return {
173+
"test": {
174+
"outputs": {
175+
"dev": {
176+
"type": "duckdb",
177+
"path": ":memory:",
178+
"attach": [
179+
{
180+
"path": dbt_profile_target.get("path", ":memory:") + qs,
181+
"type": "motherduck"
182+
}
183+
]
184+
}
185+
},
186+
"target": "dev",
187+
}
188+
}

0 commit comments

Comments
 (0)