Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into write-proto
Browse files Browse the repository at this point in the history
  • Loading branch information
zachschuermann committed Sep 24, 2024
2 parents 614f272 + b4e5403 commit e970020
Show file tree
Hide file tree
Showing 39 changed files with 830 additions and 89 deletions.
26 changes: 26 additions & 0 deletions .github/workflows/auto-assign-issue.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: Auto assign the issue via `take` comment

on:
issue_comment:
types: [created]

permissions:
issues: write

jobs:
auto-assign:
runs-on: ubuntu-latest
if: (!github.event.issue.pull_request) && github.event.comment.body == 'take'
concurrency:
# Only run one at a time for each user.
group: ${{ github.actor }}-issue-assign
steps:
- run: |
CODE=$(curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -LI https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees/${{ github.event.comment.user.login }} -o /dev/null -w '%{http_code}\n' -s)
if [ "$CODE" -eq "204" ]
then
echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}"
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees
else
echo "Cannot assign issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}"
fi
21 changes: 21 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
# Changelog

## [v0.3.1](https://github.com/delta-incubator/delta-kernel-rs/tree/v0.3.1/) (2024-09-10)

[Full Changelog](https://github.com/delta-incubator/delta-kernel-rs/compare/v0.3.0...v0.3.1)

**API Changes**

*Additions*

1. Two new binary expressions: `In` and `NotIn`, as well as a new `Scalar::Array` variant to represent arrays in the expression framework [\#270](https://github.com/delta-incubator/delta-kernel-rs/pull/270) NOTE: exact API for these expressions is still evolving.

**Implemented enhancements:**

- Enabled more golden table tests [\#301](https://github.com/delta-incubator/delta-kernel-rs/pull/301)

**Fixed bugs:**

- Allow kernel to read tables with invalid `_last_checkpoint` [\#311](https://github.com/delta-incubator/delta-kernel-rs/pull/311)
- List log files with checkpoint hint when constructing latest snapshot (when version requested is `None`) [\#312](https://github.com/delta-incubator/delta-kernel-rs/pull/312)
- Fix incorrect offset value when computing list offsets [\#327](https://github.com/delta-incubator/delta-kernel-rs/pull/327)
- Fix metadata string conversion in default engine arrow conversion [\#328](https://github.com/delta-incubator/delta-kernel-rs/pull/328)

## [v0.3.0](https://github.com/delta-incubator/delta-kernel-rs/tree/v0.3.0/) (2024-08-07)

[Full Changelog](https://github.com/delta-incubator/delta-kernel-rs/compare/v0.2.0...v0.3.0)
Expand Down
25 changes: 25 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# How to contribute to Delta Kernel Rust

Welcome! We'd love to have you contribute to Delta Kernel Rust!

## Did you find a bug?

Create an issue with a reproducible example. Please specify the Rust version, delta-kernel-rs version, the code executed, and the error message.

## Did you create a PR to fix a bug?

Open a pull request and add "Fixes #issue_number" in the PR description.

We appreciate bug fixes - thank you in advance!

## Would you like to add a new feature or change existing code?

If you would like to add a feature or change existing behavior, please make sure to create an issue and get the planned work approved by the core team first!

Always better to get aligned with the core devs before writing any code.

## Do you have questions about the source code?

Feel free to create an issue or join the [Delta Lake Slack](https://go.delta.io/slack) with questions! We chat in the `#delta-kernel` channel.

Thanks for reading! :heart: :crab:
27 changes: 14 additions & 13 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,21 @@ keywords = ["deltalake", "delta", "datalake"]
license = "Apache-2.0"
repository = "https://github.com/delta-incubator/delta-kernel-rs"
readme = "README.md"
version = "0.3.0"
version = "0.3.1"

[workspace.dependencies]
arrow = { version = "52.2" }
arrow-arith = { version = "52.2" }
arrow-array = { version = "52.2" }
arrow-cast = { version = "52.2" }
arrow-data = { version = "52.2" }
arrow-ord = { version = "52.2" }
arrow-json = { version = "52.2" }
arrow-select = { version = "52.2" }
arrow-schema = { version = "52.2" }
parquet = { version = "52.2", features = ["object_store"] }
object_store = "^0.10.2"
hdfs-native-object-store = "0.11.0"
arrow = { version = "53.0" }
arrow-arith = { version = "53.0" }
arrow-array = { version = "53.0" }
arrow-buffer = { version = "53.0" }
arrow-cast = { version = "53.0" }
arrow-data = { version = "53.0" }
arrow-ord = { version = "53.0" }
arrow-json = { version = "53.0" }
arrow-select = { version = "53.0" }
arrow-schema = { version = "53.0" }
parquet = { version = "53.0", features = ["object_store"] }
object_store = "0.11.0"
hdfs-native-object-store = "0.12.0"
hdfs-native = "0.10.0"
walkdir = "2.5.0"
6 changes: 3 additions & 3 deletions acceptance/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,16 @@ thiserror = "1"
url = "2"

[build-dependencies]
ureq = "2.2"
ureq = "2.10"
flate2 = "1.0"
tar = "0.4"

[dev-dependencies]
datatest-stable = "0.2"
test-log = { version = "0.2", default-features = false, features = ["trace"] }
tempfile = "3"
test-case = { version = "3.1.0" }
tokio = { version = "1.39" }
test-case = { version = "3.3.1" }
tokio = { version = "1.40" }
tracing-subscriber = { version = "0.3", default-features = false, features = [
"env-filter",
"fmt",
Expand Down
30 changes: 26 additions & 4 deletions derive-macros/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use proc_macro2::{Ident, TokenStream};
use quote::{quote, quote_spanned};
use syn::spanned::Spanned;
use syn::{parse_macro_input, Data, DataStruct, DeriveInput, Fields, PathArguments, Type};
use syn::{parse_macro_input, Data, DataStruct, DeriveInput, Fields, Meta, PathArguments, Type};

/// Derive a `delta_kernel::schemas::ToDataType` implementation for the annotated struct. The actual
/// field names in the schema (and therefore of the struct members) are all mandated by the Delta
Expand All @@ -10,7 +10,12 @@ use syn::{parse_macro_input, Data, DataStruct, DeriveInput, Fields, PathArgument
/// Change Metadata](https://github.com/delta-io/delta/blob/master/PROTOCOL.md#change-metadata)
/// action (this macro allows the use of standard rust snake_case, and will convert to the correct
/// delta schema camelCase version).
#[proc_macro_derive(Schema)]
///
/// If a field sets `drop_null_container_values`, it means the underlying data can contain null in
/// the values of the container (i.e. a `key` -> `null` in a `HashMap`). Therefore the schema should
/// mark the value field as nullable, but those mappings will be dropped when converting to an
/// actual rust `HashMap`. Currently this can _only_ be set on `HashMap` fields.
#[proc_macro_derive(Schema, attributes(drop_null_container_values))]
pub fn derive_schema(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
let input = parse_macro_input!(input as DeriveInput);
let struct_ident = input.ident;
Expand All @@ -20,7 +25,7 @@ pub fn derive_schema(input: proc_macro::TokenStream) -> proc_macro::TokenStream
#[automatically_derived]
impl crate::actions::schemas::ToDataType for #struct_ident {
fn to_data_type() -> crate::schema::DataType {
use crate::actions::schemas::{ToDataType, GetStructField};
use crate::actions::schemas::{ToDataType, GetStructField, GetNullableContainerStructField};
crate::schema::StructType::new(vec![
#schema_fields
]).into()
Expand Down Expand Up @@ -64,6 +69,14 @@ fn gen_schema_fields(data: &Data) -> TokenStream {
let schema_fields = fields.iter().map(|field| {
let name = field.ident.as_ref().unwrap(); // we know these are named fields
let name = get_schema_name(name);
let have_schema_null = field.attrs.iter().any(|attr| {
// check if we have drop_null_container_values attr
match &attr.meta {
Meta::Path(path) => path.get_ident().is_some_and(|ident| ident == "drop_null_container_values"),
_ => false,
}
});

match field.ty {
Type::Path(ref type_path) => {
let type_path_quoted = type_path.path.segments.iter().map(|segment| {
Expand All @@ -74,7 +87,16 @@ fn gen_schema_fields(data: &Data) -> TokenStream {
_ => panic!("Can only handle <> type path args"),
}
});
quote_spanned! { field.span() => #(#type_path_quoted),* get_struct_field(stringify!(#name))}
if have_schema_null {
if let Some(first_ident) = type_path.path.segments.first().map(|seg| &seg.ident) {
if first_ident != "HashMap" {
panic!("Can only use drop_null_container_values on HashMap fields, not {first_ident:?}");
}
}
quote_spanned! { field.span() => #(#type_path_quoted),* get_nullable_container_struct_field(stringify!(#name))}
} else {
quote_spanned! { field.span() => #(#type_path_quoted),* get_struct_field(stringify!(#name))}
}
}
_ => {
panic!("Can't handle type: {:?}", field.ty);
Expand Down
12 changes: 6 additions & 6 deletions ffi/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,20 @@ url = "2"
delta_kernel = { path = "../kernel", default-features = false, features = [
"developer-visibility",
] }
delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.3.0" }
delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.3.1" }

# used if we use the default engine to be able to move arrow data into the c-ffi format
arrow-schema = { version = "^52.0", default-features = false, features = [
arrow-schema = { version = "53.0", default-features = false, features = [
"ffi",
], optional = true }
arrow-data = { version = "^52.0", default-features = false, features = [
arrow-data = { version = "53.0", default-features = false, features = [
"ffi",
], optional = true }
arrow-array = { version = "^52.0", default-features = false, optional = true }
arrow-array = { version = "53.0", default-features = false, optional = true }

[build-dependencies]
cbindgen = "0.26.0"
libc = "0.2.147"
cbindgen = "0.27.0"
libc = "0.2.158"

[dev-dependencies]
rand = "0.8.5"
Expand Down
2 changes: 2 additions & 0 deletions ffi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ pub enum KernelError {
InvalidDecimalError,
InvalidStructDataError,
InternalError,
InvalidExpression,
}

impl From<Error> for KernelError {
Expand Down Expand Up @@ -372,6 +373,7 @@ impl From<Error> for KernelError {
source,
backtrace: _,
} => Self::from(*source),
Error::InvalidExpressionEvaluation(_) => KernelError::InvalidExpression,
}
}
}
Expand Down
26 changes: 14 additions & 12 deletions kernel/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,30 +15,31 @@ exclude = ["tests/golden_tables.rs", "tests/golden_data/" ]
all-features = true

[dependencies]
bytes = "1.4"
bytes = "1.7"
chrono = { version = "0.4" }
either = "1.8"
either = "1.13"
fix-hidden-lifetime-bug = "0.2"
indexmap = "2.2.1"
indexmap = "2.5.0"
itertools = "0.13"
lazy_static = "1.4"
roaring = "0.10.1"
lazy_static = "1.5"
roaring = "0.10.6"
serde = { version = "1", features = ["derive", "rc"] }
serde_json = "1"
thiserror = "1"
# only for structured logging
tracing = { version = "0.1", features = ["log"] }
url = "2"
z85 = "3.0.5"
uuid = { version = "1.10.0", features = ["v4", "fast-rng"] }
z85 = "3.0.5"

# bring in our derive macros
delta_kernel_derive = { path = "../derive-macros", version = "0.3.0" }
delta_kernel_derive = { path = "../derive-macros", version = "0.3.1" }

# used for developer-visibility
visibility = "0.1.0"
visibility = "0.1.1"

# Used in default engine
arrow-buffer = { workspace = true, optional = true }
arrow-array = { workspace = true, optional = true, features = ["chrono-tz"] }
arrow-select = { workspace = true, optional = true }
arrow-arith = { workspace = true, optional = true }
Expand All @@ -52,20 +53,20 @@ hdfs-native-object-store = { workspace = true, optional = true }
# Used in default and sync engine
parquet = { workspace = true, optional = true }
# Used for fetching direct urls (like pre-signed urls)
reqwest = { version = "^0.12.0", optional = true }
reqwest = { version = "0.12.7", optional = true }
strum = { version = "0.26", features = ["derive"] }


# optionally used with default engine (though not required)
tokio = { version = "1.39", optional = true, features = ["rt-multi-thread"] }
tokio = { version = "1.40", optional = true, features = ["rt-multi-thread"] }

# Used in integration tests
hdfs-native = { workspace = true, optional = true }
walkdir = { workspace = true, optional = true }

[features]
arrow-conversion = ["arrow-schema"]
arrow-expression = ["arrow-arith", "arrow-array", "arrow-ord", "arrow-schema"]
arrow-expression = ["arrow-arith", "arrow-array", "arrow-buffer", "arrow-ord", "arrow-schema"]
cloud = [
"object_store/aws",
"object_store/azure",
Expand All @@ -78,6 +79,7 @@ default-engine = [
"arrow-conversion",
"arrow-expression",
"arrow-array",
"arrow-buffer",
"arrow-cast",
"arrow-json",
"arrow-schema",
Expand Down Expand Up @@ -107,7 +109,7 @@ integration-test = [
]

[build-dependencies]
rustc_version = "0.4.0"
rustc_version = "0.4.1"

[dev-dependencies]
arrow = { workspace = true, features = ["json", "prettyprint"] }
Expand Down
4 changes: 2 additions & 2 deletions kernel/examples/read-table-multi-threaded/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ publish = false

[dependencies]
arrow = { workspace = true, features = ["prettyprint", "chrono-tz"] }
clap = { version = "^4.4", features = ["derive"] }
clap = { version = "4.5", features = ["derive"] }
delta_kernel = { path = "../../../kernel", features = [
"cloud",
"default-engine",
"developer-visibility",
"tokio",
] }
env_logger = "0.11.3"
env_logger = "0.11.5"
itertools = "0.13"
spmc = "0.3.0"
url = "2"
4 changes: 2 additions & 2 deletions kernel/examples/read-table-single-threaded/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ publish = false

[dependencies]
arrow = { workspace = true, features = ["prettyprint", "chrono-tz"] }
clap = { version = "^4.4", features = ["derive"] }
clap = { version = "4.5", features = ["derive"] }
delta_kernel = { path = "../../../kernel", features = [
"cloud",
"default-engine",
"developer-visibility",
"tokio",
] }
env_logger = "0.11.3"
env_logger = "0.11.5"
itertools = "0.13"
url = "2"
Loading

0 comments on commit e970020

Please sign in to comment.