diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 70deb53cc..36cf67320 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -69,6 +69,28 @@ jobs: - name: build docs run: cargo doc + + # When we run cargo { build, clippy } --no-default-features, we want to build/lint the kernel to + # ensure that we can build the kernel without any features enabled. Unfortunately, due to how + # cargo resolves features, if we have a workspace member that depends on the kernel with features + # enabled, the kernel will be compiled with those features (even if we specify + # --no-default-features). + # + # To cope with this, we split build/clippy --no-default-features into two runs: + # 1. build/clippy all packages that depend on the kernel with some features enabled: + # - acceptance + # - test_utils + # - feature_tests + # (and examples) + # - inspect-table + # - read-table-changes + # - read-table-multi-threaded + # - read-table-single-threaded + # 2. build/clippy all packages that only have no-feature kernel dependency + # - delta_kernel + # - delta_kernel_derive + # - delta_kernel_ffi + # - delta_kernel_ffi_macros build: runs-on: ${{ matrix.os }} strategy: @@ -85,8 +107,10 @@ jobs: components: clippy - name: build and lint with clippy run: cargo clippy --benches --tests --all-features -- -D warnings - - name: lint without default features - run: cargo clippy --no-default-features -- -D warnings + - name: lint without default features - packages which depend on kernel with features enabled + run: cargo clippy --workspace --no-default-features --exclude delta_kernel --exclude delta_kernel_ffi --exclude delta_kernel_derive --exclude delta_kernel_ffi_macros -- -D warnings + - name: lint without default features - packages which don't depend on kernel with features enabled + run: cargo clippy --no-default-features --package delta_kernel --package delta_kernel_ffi --package delta_kernel_derive --package delta_kernel_ffi_macros -- -D warnings - name: check kernel builds with default-engine run: cargo build -p feature_tests --features default-engine - name: check kernel builds with default-engine-rustls diff --git a/Cargo.toml b/Cargo.toml index 6158709b9..8523b6ce3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,8 +8,11 @@ members = [ "test-utils", "feature-tests", ] -# Only check / build main crates by default (check all with `--workspace`) -default-members = ["acceptance", "kernel"] +# note that in addition to the members above, the workspace includes examples: +# - inspect-table +# - read-table-changes +# - read-table-multi-threaded +# - read-table-single-threaded resolver = "2" [workspace.package] diff --git a/ffi/Cargo.toml b/ffi/Cargo.toml index 663c22d61..6c94ffba4 100644 --- a/ffi/Cargo.toml +++ b/ffi/Cargo.toml @@ -22,7 +22,6 @@ tracing-core = { version = "0.1", optional = true } tracing-subscriber = { version = "0.3", optional = true, features = [ "json" ] } url = "2" delta_kernel = { path = "../kernel", default-features = false, features = [ - "arrow", "developer-visibility", ] } delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.8.0" } @@ -32,7 +31,6 @@ cbindgen = "0.28" libc = "0.2.158" [dev-dependencies] -delta_kernel = { path = "../kernel", features = ["default-engine", "sync-engine"] } object_store = { workspace = true } rand = "0.8.5" test_utils = { path = "../test-utils" } @@ -42,10 +40,8 @@ trybuild = "1.0" [features] default = ["default-engine"] cloud = ["delta_kernel/cloud"] -default-engine = [ - "delta_kernel/default-engine", -] +default-engine = ["delta_kernel/default-engine", "delta_kernel/arrow"] tracing = [ "tracing-core", "tracing-subscriber" ] -sync-engine = ["delta_kernel/sync-engine"] +sync-engine = ["delta_kernel/sync-engine", "delta_kernel/arrow"] developer-visibility = [] test-ffi = [] diff --git a/ffi/src/engine_data.rs b/ffi/src/engine_data.rs index 01eaaa343..ad9b64644 100644 --- a/ffi/src/engine_data.rs +++ b/ffi/src/engine_data.rs @@ -1,13 +1,18 @@ //! EngineData related ffi code +#[cfg(feature = "default-engine")] use delta_kernel::arrow::array::{ ffi::{FFI_ArrowArray, FFI_ArrowSchema}, ArrayData, StructArray, }; -use delta_kernel::{DeltaResult, EngineData}; +#[cfg(feature = "default-engine")] +use delta_kernel::DeltaResult; +use delta_kernel::EngineData; use std::ffi::c_void; -use crate::{ExclusiveEngineData, ExternResult, IntoExternResult, SharedExternEngine}; +use crate::ExclusiveEngineData; +#[cfg(feature = "default-engine")] +use crate::{ExternResult, IntoExternResult, SharedExternEngine}; use super::handle::Handle; diff --git a/ffi/src/engine_funcs.rs b/ffi/src/engine_funcs.rs index 9cba0006c..8c8c06a79 100644 --- a/ffi/src/engine_funcs.rs +++ b/ffi/src/engine_funcs.rs @@ -54,6 +54,8 @@ impl Drop for FileReadResultIterator { /// /// The iterator must be valid (returned by [`read_parquet_file`]) and not yet freed by /// [`free_read_result_iter`]. The visitor function pointer must be non-null. +/// +/// [`free_engine_data`]: crate::free_engine_data #[no_mangle] pub unsafe extern "C" fn read_result_next( mut data: Handle, diff --git a/ffi/src/expressions/engine.rs b/ffi/src/expressions/engine.rs index 2e839c50f..14472b0fb 100644 --- a/ffi/src/expressions/engine.rs +++ b/ffi/src/expressions/engine.rs @@ -28,12 +28,14 @@ impl KernelExpressionVisitorState { /// /// When invoking [`scan::scan`], The engine provides a pointer to the (engine's native) predicate, /// along with a visitor function that can be invoked to recursively visit the predicate. This -/// engine state must be valid until the call to `scan::scan` returns. Inside that method, the +/// engine state must be valid until the call to [`scan::scan`] returns. Inside that method, the /// kernel allocates visitor state, which becomes the second argument to the predicate visitor /// invocation along with the engine-provided predicate pointer. The visitor state is valid for the /// lifetime of the predicate visitor invocation. Thanks to this double indirection, engine and /// kernel each retain ownership of their respective objects, with no need to coordinate memory /// lifetimes with the other. +/// +/// [`scan::scan`]: crate::scan::scan #[repr(C)] pub struct EnginePredicate { pub predicate: *mut c_void, diff --git a/ffi/src/expressions/kernel.rs b/ffi/src/expressions/kernel.rs index 32db7db58..18ed1bc5a 100644 --- a/ffi/src/expressions/kernel.rs +++ b/ffi/src/expressions/kernel.rs @@ -53,8 +53,8 @@ type VisitUnaryFn = extern "C" fn(data: *mut c_void, sibling_list_id: usize, chi /// WARNING: The visitor MUST NOT retain internal references to string slices or binary data passed /// to visitor methods /// TODO: Visit type information in struct field and null. This will likely involve using the schema -/// visitor. Note that struct literals are currently in flux, and may change significantly. Here is the relevant -/// issue: https://github.com/delta-io/delta-kernel-rs/issues/412 +/// visitor. Note that struct literals are currently in flux, and may change significantly. Here is +/// the relevant issue: #[repr(C)] pub struct EngineExpressionVisitor { /// An opaque engine state pointer diff --git a/ffi/src/handle.rs b/ffi/src/handle.rs index 30b695ecc..6a29cad52 100644 --- a/ffi/src/handle.rs +++ b/ffi/src/handle.rs @@ -88,14 +88,14 @@ mod private { /// Additionally, in keeping with the [`Send`] contract, multi-threaded external code must /// enforce mutual exclusion -- no mutable handle should ever be passed to more than one kernel /// API call at a time. If thread races are possible, the handle should be protected with a - /// mutex. Due to Rust [reference - /// rules](https://doc.rust-lang.org/book/ch04-02-references-and-borrowing.html#the-rules-of-references), - /// this requirement applies even for API calls that appear to be read-only (because Rust code - /// always receives the handle as mutable). + /// mutex. Due to Rust [reference rules], this requirement applies even for API calls that + /// appear to be read-only (because Rust code always receives the handle as mutable). /// /// NOTE: Because the underlying type is always [`Sync`], multi-threaded external code can /// freely access shared (non-mutable) handles. /// + /// [reference rules]: + /// https://doc.rust-lang.org/book/ch04-02-references-and-borrowing.html#the-rules-of-references /// cbindgen:transparent-typedef #[repr(transparent)] pub struct Handle { diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index af8f15edd..8e39b8018 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -81,7 +81,7 @@ impl Iterator for EngineIterator { /// /// Whoever instantiates the struct must ensure it does not outlive the data it points to. The /// compiler cannot help us here, because raw pointers don't have lifetimes. A good rule of thumb is -/// to always use the [`kernel_string_slice`] macro to create string slices, and to avoid returning +/// to always use the `kernel_string_slice` macro to create string slices, and to avoid returning /// a string slice from a code block or function (since the move risks over-extending its lifetime): /// /// ```ignore @@ -331,6 +331,8 @@ pub unsafe extern "C" fn free_row_indexes(slice: KernelRowIndexArray) { /// an opaque struct that encapsulates data read by an engine. this handle can be passed back into /// some kernel calls to operate on the data, or can be converted into the raw data as read by the /// [`delta_kernel::Engine`] by calling [`get_raw_engine_data`] +/// +/// [`get_raw_engine_data`]: crate::engine_data::get_raw_engine_data #[handle_descriptor(target=dyn EngineData, mutable=true)] pub struct ExclusiveEngineData; @@ -353,12 +355,14 @@ pub trait ExternEngine: Send + Sync { #[handle_descriptor(target=dyn ExternEngine, mutable=false)] pub struct SharedExternEngine; +#[cfg(any(feature = "default-engine", feature = "sync-engine"))] struct ExternEngineVtable { // Actual engine instance to use engine: Arc, allocate_error: AllocateErrorFn, } +#[cfg(any(feature = "default-engine", feature = "sync-engine"))] impl Drop for ExternEngineVtable { fn drop(&mut self) { debug!("dropping engine interface"); @@ -369,6 +373,7 @@ impl Drop for ExternEngineVtable { /// /// Kernel doesn't use any threading or concurrency. If engine chooses to do so, engine is /// responsible for handling any races that could result. +#[cfg(any(feature = "default-engine", feature = "sync-engine"))] unsafe impl Send for ExternEngineVtable {} /// # Safety @@ -380,8 +385,10 @@ unsafe impl Send for ExternEngineVtable {} /// Basically, by failing to implement these traits, we forbid the engine from being able to declare /// its thread-safety (because rust assumes it is not threadsafe). By implementing them, we leave it /// up to the engine to enforce thread safety if engine chooses to use threads at all. +#[cfg(any(feature = "default-engine", feature = "sync-engine"))] unsafe impl Sync for ExternEngineVtable {} +#[cfg(any(feature = "default-engine", feature = "sync-engine"))] impl ExternEngine for ExternEngineVtable { fn engine(&self) -> Arc { self.engine.clone() @@ -677,8 +684,11 @@ pub struct StringSliceIterator; /// # Safety /// -/// The iterator must be valid (returned by [kernel_scan_data_init]) and not yet freed by -/// [kernel_scan_data_free]. The visitor function pointer must be non-null. +/// The iterator must be valid (returned by [`kernel_scan_data_init`]) and not yet freed by +/// [`free_kernel_scan_data`]. The visitor function pointer must be non-null. +/// +/// [`kernel_scan_data_init`]: crate::scan::kernel_scan_data_init +/// [`free_kernel_scan_data`]: crate::scan::free_kernel_scan_data #[no_mangle] pub unsafe extern "C" fn string_slice_next( data: Handle, diff --git a/ffi/src/scan.rs b/ffi/src/scan.rs index 367817787..d230c2a49 100644 --- a/ffi/src/scan.rs +++ b/ffi/src/scan.rs @@ -30,8 +30,9 @@ use super::handle::Handle; pub struct SharedScan; /// Drops a scan. +/// /// # Safety -/// Caller is responsible for passing a [valid][Handle#Validity] scan handle. +/// Caller is responsible for passing a valid scan handle. #[no_mangle] pub unsafe extern "C" fn free_scan(scan: Handle) { scan.drop_handle(); @@ -182,6 +183,9 @@ fn kernel_scan_data_init_impl( /// /// The iterator must be valid (returned by [kernel_scan_data_init]) and not yet freed by /// [`free_kernel_scan_data`]. The visitor function pointer must be non-null. +/// +/// [`free_bool_slice`]: crate::free_bool_slice +/// [`free_engine_data`]: crate::free_engine_data #[no_mangle] pub unsafe extern "C" fn kernel_scan_data_next( data: Handle, diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml index 1c2f3a1d7..20df4a524 100644 --- a/test-utils/Cargo.toml +++ b/test-utils/Cargo.toml @@ -12,6 +12,6 @@ version.workspace = true release = false [dependencies] -delta_kernel = { path = "../kernel", features = [ "default-engine", "arrow" ] } +delta_kernel = { path = "../kernel", features = [ "default-engine", "arrow" ] } itertools = "0.13.0" object_store = { workspace = true }