Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,28 @@ jobs:
- name: build docs
run: cargo doc


# When we run cargo { build, clippy } --no-default-features, we want to build/lint the kernel to
# ensure that we can build the kernel without any features enabled. Unfortunately, due to how
# cargo resolves features, if we have a workspace member that depends on the kernel with features
# enabled, the kernel will be compiled with those features (even if we specify
# --no-default-features).
#
# To cope with this, we split build/clippy --no-default-features into two runs:
# 1. build/clippy all packages that depend on the kernel with some features enabled:
# - acceptance
# - test_utils
# - feature_tests
# (and examples)
# - inspect-table
# - read-table-changes
# - read-table-multi-threaded
# - read-table-single-threaded
# 2. build/clippy all packages that only have no-feature kernel dependency
# - delta_kernel
# - delta_kernel_derive
# - delta_kernel_ffi
# - delta_kernel_ffi_macros
build:
runs-on: ${{ matrix.os }}
strategy:
Expand All @@ -85,8 +107,10 @@ jobs:
components: clippy
- name: build and lint with clippy
run: cargo clippy --benches --tests --all-features -- -D warnings
- name: lint without default features
run: cargo clippy --no-default-features -- -D warnings
- name: lint without default features - packages which depend on kernel with features enabled
run: cargo clippy --workspace --no-default-features --exclude delta_kernel --exclude delta_kernel_ffi --exclude delta_kernel_derive --exclude delta_kernel_ffi_macros -- -D warnings
- name: lint without default features - packages which don't depend on kernel with features enabled
run: cargo clippy --no-default-features --package delta_kernel --package delta_kernel_ffi --package delta_kernel_derive --package delta_kernel_ffi_macros -- -D warnings
- name: check kernel builds with default-engine
run: cargo build -p feature_tests --features default-engine
- name: check kernel builds with default-engine-rustls
Expand Down
7 changes: 5 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@ members = [
"test-utils",
"feature-tests",
]
# Only check / build main crates by default (check all with `--workspace`)
default-members = ["acceptance", "kernel"]
# note that in addition to the members above, the workspace includes examples:
# - inspect-table
# - read-table-changes
# - read-table-multi-threaded
# - read-table-single-threaded
resolver = "2"

[workspace.package]
Expand Down
8 changes: 2 additions & 6 deletions ffi/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ tracing-core = { version = "0.1", optional = true }
tracing-subscriber = { version = "0.3", optional = true, features = [ "json" ] }
url = "2"
delta_kernel = { path = "../kernel", default-features = false, features = [
"arrow",
"developer-visibility",
] }
delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.8.0" }
Expand All @@ -32,7 +31,6 @@ cbindgen = "0.28"
libc = "0.2.158"

[dev-dependencies]
delta_kernel = { path = "../kernel", features = ["default-engine", "sync-engine"] }
object_store = { workspace = true }
rand = "0.8.5"
test_utils = { path = "../test-utils" }
Expand All @@ -42,10 +40,8 @@ trybuild = "1.0"
[features]
default = ["default-engine"]
cloud = ["delta_kernel/cloud"]
default-engine = [
"delta_kernel/default-engine",
]
default-engine = ["delta_kernel/default-engine", "delta_kernel/arrow"]
tracing = [ "tracing-core", "tracing-subscriber" ]
sync-engine = ["delta_kernel/sync-engine"]
sync-engine = ["delta_kernel/sync-engine", "delta_kernel/arrow"]
developer-visibility = []
test-ffi = []
9 changes: 7 additions & 2 deletions ffi/src/engine_data.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
//! EngineData related ffi code

#[cfg(feature = "default-engine")]
use delta_kernel::arrow::array::{
ffi::{FFI_ArrowArray, FFI_ArrowSchema},
ArrayData, StructArray,
};
use delta_kernel::{DeltaResult, EngineData};
#[cfg(feature = "default-engine")]
use delta_kernel::DeltaResult;
use delta_kernel::EngineData;
use std::ffi::c_void;

use crate::{ExclusiveEngineData, ExternResult, IntoExternResult, SharedExternEngine};
use crate::ExclusiveEngineData;
#[cfg(feature = "default-engine")]
use crate::{ExternResult, IntoExternResult, SharedExternEngine};

use super::handle::Handle;

Expand Down
2 changes: 2 additions & 0 deletions ffi/src/engine_funcs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ impl Drop for FileReadResultIterator {
///
/// The iterator must be valid (returned by [`read_parquet_file`]) and not yet freed by
/// [`free_read_result_iter`]. The visitor function pointer must be non-null.
///
/// [`free_engine_data`]: crate::free_engine_data
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what's this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ohh nm, i guess it creates the link in the docs?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep!

#[no_mangle]
pub unsafe extern "C" fn read_result_next(
mut data: Handle<ExclusiveFileReadResultIterator>,
Expand Down
4 changes: 3 additions & 1 deletion ffi/src/expressions/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,14 @@ impl KernelExpressionVisitorState {
///
/// When invoking [`scan::scan`], The engine provides a pointer to the (engine's native) predicate,
/// along with a visitor function that can be invoked to recursively visit the predicate. This
/// engine state must be valid until the call to `scan::scan` returns. Inside that method, the
/// engine state must be valid until the call to [`scan::scan`] returns. Inside that method, the
/// kernel allocates visitor state, which becomes the second argument to the predicate visitor
/// invocation along with the engine-provided predicate pointer. The visitor state is valid for the
/// lifetime of the predicate visitor invocation. Thanks to this double indirection, engine and
/// kernel each retain ownership of their respective objects, with no need to coordinate memory
/// lifetimes with the other.
///
/// [`scan::scan`]: crate::scan::scan
#[repr(C)]
pub struct EnginePredicate {
pub predicate: *mut c_void,
Expand Down
4 changes: 2 additions & 2 deletions ffi/src/expressions/kernel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ type VisitUnaryFn = extern "C" fn(data: *mut c_void, sibling_list_id: usize, chi
/// WARNING: The visitor MUST NOT retain internal references to string slices or binary data passed
/// to visitor methods
/// TODO: Visit type information in struct field and null. This will likely involve using the schema
/// visitor. Note that struct literals are currently in flux, and may change significantly. Here is the relevant
/// issue: https://github.com/delta-io/delta-kernel-rs/issues/412
/// visitor. Note that struct literals are currently in flux, and may change significantly. Here is
/// the relevant issue: <https://github.com/delta-io/delta-kernel-rs/issues/412>
#[repr(C)]
pub struct EngineExpressionVisitor {
/// An opaque engine state pointer
Expand Down
8 changes: 4 additions & 4 deletions ffi/src/handle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,14 @@ mod private {
/// Additionally, in keeping with the [`Send`] contract, multi-threaded external code must
/// enforce mutual exclusion -- no mutable handle should ever be passed to more than one kernel
/// API call at a time. If thread races are possible, the handle should be protected with a
/// mutex. Due to Rust [reference
/// rules](https://doc.rust-lang.org/book/ch04-02-references-and-borrowing.html#the-rules-of-references),
/// this requirement applies even for API calls that appear to be read-only (because Rust code
/// always receives the handle as mutable).
/// mutex. Due to Rust [reference rules], this requirement applies even for API calls that
/// appear to be read-only (because Rust code always receives the handle as mutable).
///
/// NOTE: Because the underlying type is always [`Sync`], multi-threaded external code can
/// freely access shared (non-mutable) handles.
///
/// [reference rules]:
/// https://doc.rust-lang.org/book/ch04-02-references-and-borrowing.html#the-rules-of-references
/// cbindgen:transparent-typedef
#[repr(transparent)]
pub struct Handle<H: HandleDescriptor> {
Expand Down
16 changes: 13 additions & 3 deletions ffi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ impl Iterator for EngineIterator {
///
/// Whoever instantiates the struct must ensure it does not outlive the data it points to. The
/// compiler cannot help us here, because raw pointers don't have lifetimes. A good rule of thumb is
/// to always use the [`kernel_string_slice`] macro to create string slices, and to avoid returning
/// to always use the `kernel_string_slice` macro to create string slices, and to avoid returning
/// a string slice from a code block or function (since the move risks over-extending its lifetime):
///
/// ```ignore
Expand Down Expand Up @@ -331,6 +331,8 @@ pub unsafe extern "C" fn free_row_indexes(slice: KernelRowIndexArray) {
/// an opaque struct that encapsulates data read by an engine. this handle can be passed back into
/// some kernel calls to operate on the data, or can be converted into the raw data as read by the
/// [`delta_kernel::Engine`] by calling [`get_raw_engine_data`]
///
/// [`get_raw_engine_data`]: crate::engine_data::get_raw_engine_data
#[handle_descriptor(target=dyn EngineData, mutable=true)]
pub struct ExclusiveEngineData;

Expand All @@ -353,12 +355,14 @@ pub trait ExternEngine: Send + Sync {
#[handle_descriptor(target=dyn ExternEngine, mutable=false)]
pub struct SharedExternEngine;

#[cfg(any(feature = "default-engine", feature = "sync-engine"))]
struct ExternEngineVtable {
// Actual engine instance to use
engine: Arc<dyn Engine>,
allocate_error: AllocateErrorFn,
}

#[cfg(any(feature = "default-engine", feature = "sync-engine"))]
impl Drop for ExternEngineVtable {
fn drop(&mut self) {
debug!("dropping engine interface");
Expand All @@ -369,6 +373,7 @@ impl Drop for ExternEngineVtable {
///
/// Kernel doesn't use any threading or concurrency. If engine chooses to do so, engine is
/// responsible for handling any races that could result.
#[cfg(any(feature = "default-engine", feature = "sync-engine"))]
unsafe impl Send for ExternEngineVtable {}

/// # Safety
Expand All @@ -380,8 +385,10 @@ unsafe impl Send for ExternEngineVtable {}
/// Basically, by failing to implement these traits, we forbid the engine from being able to declare
/// its thread-safety (because rust assumes it is not threadsafe). By implementing them, we leave it
/// up to the engine to enforce thread safety if engine chooses to use threads at all.
#[cfg(any(feature = "default-engine", feature = "sync-engine"))]
unsafe impl Sync for ExternEngineVtable {}

#[cfg(any(feature = "default-engine", feature = "sync-engine"))]
impl ExternEngine for ExternEngineVtable {
fn engine(&self) -> Arc<dyn Engine> {
self.engine.clone()
Expand Down Expand Up @@ -677,8 +684,11 @@ pub struct StringSliceIterator;

/// # Safety
///
/// The iterator must be valid (returned by [kernel_scan_data_init]) and not yet freed by
/// [kernel_scan_data_free]. The visitor function pointer must be non-null.
/// The iterator must be valid (returned by [`kernel_scan_data_init`]) and not yet freed by
/// [`free_kernel_scan_data`]. The visitor function pointer must be non-null.
///
/// [`kernel_scan_data_init`]: crate::scan::kernel_scan_data_init
/// [`free_kernel_scan_data`]: crate::scan::free_kernel_scan_data
#[no_mangle]
pub unsafe extern "C" fn string_slice_next(
data: Handle<StringSliceIterator>,
Expand Down
6 changes: 5 additions & 1 deletion ffi/src/scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ use super::handle::Handle;
pub struct SharedScan;

/// Drops a scan.
///
/// # Safety
/// Caller is responsible for passing a [valid][Handle#Validity] scan handle.
/// Caller is responsible for passing a valid scan handle.
#[no_mangle]
pub unsafe extern "C" fn free_scan(scan: Handle<SharedScan>) {
scan.drop_handle();
Expand Down Expand Up @@ -182,6 +183,9 @@ fn kernel_scan_data_init_impl(
///
/// The iterator must be valid (returned by [kernel_scan_data_init]) and not yet freed by
/// [`free_kernel_scan_data`]. The visitor function pointer must be non-null.
///
/// [`free_bool_slice`]: crate::free_bool_slice
/// [`free_engine_data`]: crate::free_engine_data
#[no_mangle]
pub unsafe extern "C" fn kernel_scan_data_next(
data: Handle<SharedScanDataIterator>,
Expand Down
2 changes: 1 addition & 1 deletion test-utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ version.workspace = true
release = false

[dependencies]
delta_kernel = { path = "../kernel", features = [ "default-engine", "arrow" ] }
delta_kernel = { path = "../kernel", features = [ "default-engine", "arrow" ] }
itertools = "0.13.0"
object_store = { workspace = true }
Loading