diff --git a/kedro-datasets/RELEASE.md b/kedro-datasets/RELEASE.md index daff41362..3eb66261f 100755 --- a/kedro-datasets/RELEASE.md +++ b/kedro-datasets/RELEASE.md @@ -2,17 +2,17 @@ ## Major features and improvements -- Added functionality to save Pandas DataFrame directly to Snowflake, facilitating seemless `.csv` ingestion -- Added Python 3.9, 3.10 and 3.11 support for SnowflakeTableDataset +- Added functionality to save Pandas DataFrame directly to Snowflake, facilitating seemless `.csv` ingestion. +- Added Python 3.9, 3.10 and 3.11 support for SnowflakeTableDataset. +- Changed `ibis.TableDataset` to support passing arguments to `create_table`, `create_view`, and `table` via a `table_args` parameter which allows users to set catalog/database in a data cataog entry. - Added the following new **experimental** datasets: | Type | Description | Location | | --------------------------------- | ------------------------------------------------------ | ---------------------------------------- | | `databricks.ExternalTableDataset` | A dataset for accessing external tables in Databricks. | `kedro_datasets_experimental.databricks` | - ## Bug fixes and other changes -- Implemented Snowflake's (local testing framework)[https://docs.snowflake.com/en/developer-guide/snowpark/python/testing-locally] for testing purposes +- Implemented Snowflake's (local testing framework)[https://docs.snowflake.com/en/developer-guide/snowpark/python/testing-locally] for testing purposes. ## Breaking Changes - Demoted `video.VideoDataset` from core to experimental dataset. @@ -23,6 +23,7 @@ Many thanks to the following Kedroids for contributing PRs to this release: - [Thomas d'Hooghe](https://github.com/tdhooghe) - [Minura Punchihewa](https://github.com/MinuraPunchihewa) +- [Mark Druffel](https://github.com/mark-druffel) # Release 5.1.0 diff --git a/kedro-datasets/kedro_datasets/ibis/table_dataset.py b/kedro-datasets/kedro_datasets/ibis/table_dataset.py index 30709d08e..ef80efa5b 100644 --- a/kedro-datasets/kedro_datasets/ibis/table_dataset.py +++ b/kedro-datasets/kedro_datasets/ibis/table_dataset.py @@ -1,4 +1,5 @@ """Provide data loading and saving functionality for Ibis's backends.""" + from __future__ import annotations import warnings @@ -69,6 +70,7 @@ class TableDataset(AbstractDataset[ir.Table, ir.Table]): "materialized": "view", "overwrite": True, } + DEFAULT_TABLE_ARGS: ClassVar[dict[str, Any]] = {} _connections: ClassVar[dict[tuple[tuple[str, str]], BaseBackend]] = {} @@ -79,6 +81,7 @@ def __init__( # noqa: PLR0913 file_format: str | None = None, table_name: str | None = None, connection: dict[str, Any] | None = None, + table_args: dict[str, Any] | None = None, load_args: dict[str, Any] | None = None, save_args: dict[str, Any] | None = None, metadata: dict[str, Any] | None = None, @@ -104,6 +107,8 @@ def __init__( # noqa: PLR0913 table_name: The name of the table or view to read or create. connection: Configuration for connecting to an Ibis backend. If not provided, connect to DuckDB in in-memory mode. + table_args: Additional arguments passed to the Ibis backend's + `create_{materialized}` method and `table` method. load_args: Additional arguments passed to the Ibis backend's `read_{file_format}` method. save_args: Additional arguments passed to the Ibis backend's @@ -141,8 +146,13 @@ def __init__( # noqa: PLR0913 self._save_args = deepcopy(self.DEFAULT_SAVE_ARGS) if save_args is not None: + if table_args is not None: + save_args["database"] = table_args.get("database", None) self._save_args.update(save_args) + self._table_args = deepcopy(self.DEFAULT_TABLE_ARGS) + if table_args is not None: + self._table_args.update(table_args) self._materialized = self._save_args.pop("materialized") @property @@ -176,7 +186,7 @@ def load(self) -> ir.Table: reader = getattr(self.connection, f"read_{self._file_format}") return reader(self._filepath, self._table_name, **self._load_args) else: - return self.connection.table(self._table_name) + return self.connection.table(self._table_name, **self._table_args) def save(self, data: ir.Table) -> None: if self._table_name is None: @@ -191,6 +201,7 @@ def _describe(self) -> dict[str, Any]: "file_format": self._file_format, "table_name": self._table_name, "backend": self._connection_config["backend"], + "table_args": self._table_args, "load_args": self._load_args, "save_args": self._save_args, "materialized": self._materialized,