From ba37b6279a413c513195c4160379cfee6d52ea84 Mon Sep 17 00:00:00 2001 From: Zach Schuermann Date: Thu, 9 Jan 2025 14:37:49 -0800 Subject: [PATCH] feat: new feature flag `default-engine-rustls` (#572) We had a request to remove our (implicit) dependency on native-tls. The `default-engine` feature flag pulls in `reqwest` with default features which requires native-tls. This PR introduces a new feature flag: `default-engine-rustls` which instead specifies `default-features = false` for `reqwest` enables the same features which are used in `object_store`: `reqwest = { version = "0.12", default-features = false, features = ["rustls-tls-native-roots", "http2"], optional = true } ` ### Details This PR actually introduces two new feature flags: and 'internal' feature flag `default-engine-base` and the new `default-engine-rustls`. The former doesn't work on its own, we throw a compile error if you attempt to use the 'base' feature without either `default-engine` or `default-engine-rustls`: ``` cargo b -p delta_kernel --features default-engine-base Compiling delta_kernel v0.6.0 (/Users/zach.schuermann/dev/delta-kernel-rs2/kernel) error: The default-engine-base feature flag is not meant to be used directly. Please use either default-engine or default-engine-rustls. --> kernel/src/lib.rs:470:1 | 470 | / compile_error!( 471 | | "The default-engine-base feature flag is not meant to be used directly. \ 472 | | Please use either default-engine or default-engine-rustls." 473 | | ); | |_^ ``` Lastly, a new crate `feature-tests` in the workspace was created to ensure that the features work as expected. ### This PR affects the following public APIs New `default-engine-rustls` feature flag. (New private `_default-engine-base` feature flag) ## How was this change tested? New `feature-tests` crate and added to CI. ## Future work 1. this flag isn't exposed in the FFI (just normal `default-engine` is) 2. need to clean up feature flags in general (lots of overlap, etc.) 3. need to add a PresignedUrl test (does not yet exist) --- .github/workflows/build.yml | 4 ++++ Cargo.toml | 1 + feature-tests/Cargo.toml | 16 ++++++++++++++++ feature-tests/src/lib.rs | 12 ++++++++++++ kernel/Cargo.toml | 21 ++++++++++++++++++--- kernel/src/actions/set_transaction.rs | 2 +- kernel/src/engine/mod.rs | 6 +++--- kernel/src/error.rs | 6 +++--- kernel/src/lib.rs | 12 ++++++++++++ 9 files changed, 70 insertions(+), 10 deletions(-) create mode 100644 feature-tests/Cargo.toml create mode 100644 feature-tests/src/lib.rs diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a72f3b33f..a8a24dd07 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -107,6 +107,10 @@ jobs: run: cargo clippy --benches --tests --all-features -- -D warnings - name: lint without default features run: cargo clippy --no-default-features -- -D warnings + - name: check kernel builds with default-engine + run: cargo build -p feature_tests --features default-engine + - name: check kernel builds with default-engine-rustls + run: cargo build -p feature_tests --features default-engine-rustls test: runs-on: ${{ matrix.os }} strategy: diff --git a/Cargo.toml b/Cargo.toml index ead0064ca..220ab18c0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ members = [ "kernel", "kernel/examples/*", "test-utils", + "feature-tests", ] # Only check / build main crates by default (check all with `--workspace`) default-members = ["acceptance", "kernel"] diff --git a/feature-tests/Cargo.toml b/feature-tests/Cargo.toml new file mode 100644 index 000000000..6f9827e83 --- /dev/null +++ b/feature-tests/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "feature_tests" +edition.workspace = true +homepage.workspace = true +keywords.workspace = true +license.workspace = true +repository.workspace = true +readme.workspace = true +version.workspace = true + +[dependencies] +delta_kernel = { path = "../kernel" } + +[features] +default-engine = [ "delta_kernel/default-engine" ] +default-engine-rustls = [ "delta_kernel/default-engine-rustls" ] diff --git a/feature-tests/src/lib.rs b/feature-tests/src/lib.rs new file mode 100644 index 000000000..a421d86f9 --- /dev/null +++ b/feature-tests/src/lib.rs @@ -0,0 +1,12 @@ +/// This is a compilation test to ensure that the default-engine feature flags are working +/// correctly. Run (from workspace root) with: +/// 1. `cargo b -p feature_tests --features default-engine-rustls` +/// 2. `cargo b -p feature_tests --features default-engine` +/// These run in our build CI. +pub fn test_default_engine_feature_flags() { + #[cfg(any(feature = "default-engine", feature = "default-engine-rustls"))] + { + #[allow(unused_imports)] + use delta_kernel::engine::default::DefaultEngine; + } +} diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index b5cb30634..459c49177 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -54,7 +54,7 @@ hdfs-native-object-store = { workspace = true, optional = true } # Used in default and sync engine parquet = { workspace = true, optional = true } # Used for fetching direct urls (like pre-signed urls) -reqwest = { version = "0.12.7", optional = true } +reqwest = { version = "0.12.8", default-features = false, optional = true } strum = { version = "0.26", features = ["derive"] } @@ -76,7 +76,10 @@ cloud = [ "hdfs-native-object-store", ] default = [] -default-engine = [ + +# this is an 'internal' feature flag which has all the shared bits from default-engine and +# default-engine-rustls +default-engine-base = [ "arrow-conversion", "arrow-expression", "arrow-array", @@ -89,12 +92,24 @@ default-engine = [ "object_store", "parquet/async", "parquet/object_store", - "reqwest", "tokio", "uuid/v4", "uuid/fast-rng", ] +# the default-engine use the reqwest crate with default features which uses native-tls. if you want +# to instead use rustls, use 'default-engine-rustls' which has no native-tls dependency +default-engine = [ + "default-engine-base", + "reqwest/default", +] + +default-engine-rustls = [ + "default-engine-base", + "reqwest/rustls-tls-native-roots", + "reqwest/http2", +] + developer-visibility = [] sync-engine = [ "arrow-cast", diff --git a/kernel/src/actions/set_transaction.rs b/kernel/src/actions/set_transaction.rs index 914280ede..89c389d3d 100644 --- a/kernel/src/actions/set_transaction.rs +++ b/kernel/src/actions/set_transaction.rs @@ -80,7 +80,7 @@ impl SetTransactionScanner { } } -#[cfg(all(test, feature = "default-engine"))] +#[cfg(all(test, feature = "sync-engine"))] mod tests { use std::path::PathBuf; diff --git a/kernel/src/engine/mod.rs b/kernel/src/engine/mod.rs index fdeca558a..284844cd1 100644 --- a/kernel/src/engine/mod.rs +++ b/kernel/src/engine/mod.rs @@ -7,11 +7,11 @@ pub(crate) mod arrow_conversion; #[cfg(all( feature = "arrow-expression", - any(feature = "default-engine", feature = "sync-engine") + any(feature = "default-engine-base", feature = "sync-engine") ))] pub mod arrow_expression; -#[cfg(feature = "default-engine")] +#[cfg(feature = "default-engine-base")] pub mod default; #[cfg(feature = "sync-engine")] @@ -25,7 +25,7 @@ macro_rules! declare_modules { }; } -#[cfg(any(feature = "default-engine", feature = "sync-engine"))] +#[cfg(any(feature = "default-engine-base", feature = "sync-engine"))] declare_modules!( (pub, arrow_data), (pub, parquet_row_group_skipping), diff --git a/kernel/src/error.rs b/kernel/src/error.rs index d3e5e53c9..e3230aeb9 100644 --- a/kernel/src/error.rs +++ b/kernel/src/error.rs @@ -27,7 +27,7 @@ pub enum Error { }, /// An error performing operations on arrow data - #[cfg(any(feature = "default-engine", feature = "sync-engine"))] + #[cfg(any(feature = "default-engine-base", feature = "sync-engine"))] #[error(transparent)] Arrow(arrow_schema::ArrowError), @@ -75,7 +75,7 @@ pub enum Error { #[error("Object store path error: {0}")] ObjectStorePath(#[from] object_store::path::Error), - #[cfg(feature = "default-engine")] + #[cfg(any(feature = "default-engine", feature = "default-engine-rustls"))] #[error("Reqwest Error: {0}")] Reqwest(#[from] reqwest::Error), @@ -303,7 +303,7 @@ from_with_backtrace!( (std::io::Error, IOError) ); -#[cfg(any(feature = "default-engine", feature = "sync-engine"))] +#[cfg(any(feature = "default-engine-base", feature = "sync-engine"))] impl From for Error { fn from(value: arrow_schema::ArrowError) -> Self { Self::Arrow(value).with_backtrace() diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index 1d6902d86..f27907bcd 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -456,3 +456,15 @@ pub trait Engine: AsAny { /// Get the connector provided [`ParquetHandler`]. fn get_parquet_handler(&self) -> Arc; } + +// we have an 'internal' feature flag: default-engine-base, which is actually just the shared +// pieces of default-engine and default-engine-rustls. the crate can't compile with _only_ +// default-engine-base, so we give a friendly error here. +#[cfg(all( + feature = "default-engine-base", + not(any(feature = "default-engine", feature = "default-engine-rustls",)) +))] +compile_error!( + "The default-engine-base feature flag is not meant to be used directly. \ + Please use either default-engine or default-engine-rustls." +);