Skip to content

Commit

Permalink
Merge branch 'main' into fix-ffi-invalid-handle-tests
Browse files Browse the repository at this point in the history
  • Loading branch information
scovich authored Sep 19, 2024
2 parents 8b58968 + 1a66fb9 commit ac51e9d
Show file tree
Hide file tree
Showing 27 changed files with 319 additions and 69 deletions.
21 changes: 21 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
# Changelog

## [v0.3.1](https://github.com/delta-incubator/delta-kernel-rs/tree/v0.3.1/) (2024-09-10)

[Full Changelog](https://github.com/delta-incubator/delta-kernel-rs/compare/v0.3.0...v0.3.1)

**API Changes**

*Additions*

1. Two new binary expressions: `In` and `NotIn`, as well as a new `Scalar::Array` variant to represent arrays in the expression framework [\#270](https://github.com/delta-incubator/delta-kernel-rs/pull/270) NOTE: exact API for these expressions is still evolving.

**Implemented enhancements:**

- Enabled more golden table tests [\#301](https://github.com/delta-incubator/delta-kernel-rs/pull/301)

**Fixed bugs:**

- Allow kernel to read tables with invalid `_last_checkpoint` [\#311](https://github.com/delta-incubator/delta-kernel-rs/pull/311)
- List log files with checkpoint hint when constructing latest snapshot (when version requested is `None`) [\#312](https://github.com/delta-incubator/delta-kernel-rs/pull/312)
- Fix incorrect offset value when computing list offsets [\#327](https://github.com/delta-incubator/delta-kernel-rs/pull/327)
- Fix metadata string conversion in default engine arrow conversion [\#328](https://github.com/delta-incubator/delta-kernel-rs/pull/328)

## [v0.3.0](https://github.com/delta-incubator/delta-kernel-rs/tree/v0.3.0/) (2024-08-07)

[Full Changelog](https://github.com/delta-incubator/delta-kernel-rs/compare/v0.2.0...v0.3.0)
Expand Down
28 changes: 14 additions & 14 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,21 @@ keywords = ["deltalake", "delta", "datalake"]
license = "Apache-2.0"
repository = "https://github.com/delta-incubator/delta-kernel-rs"
readme = "README.md"
version = "0.3.0"
version = "0.3.1"

[workspace.dependencies]
arrow = { version = "^52.0" }
arrow-arith = { version = "^52.0" }
arrow-array = { version = "^52.0" }
arrow-buffer = { version = "^52.0" }
arrow-cast = { version = "^52.0" }
arrow-data = { version = "^52.0" }
arrow-ord = { version = "^52.0" }
arrow-json = { version = "^52.0" }
arrow-select = { version = "^52.0" }
arrow-schema = { version = "^52.0" }
parquet = { version = "^52.0", features = ["object_store"] }
object_store = "^0.10.2"
hdfs-native-object-store = "0.11.0"
arrow = { version = "53.0" }
arrow-arith = { version = "53.0" }
arrow-array = { version = "53.0" }
arrow-buffer = { version = "53.0" }
arrow-cast = { version = "53.0" }
arrow-data = { version = "53.0" }
arrow-ord = { version = "53.0" }
arrow-json = { version = "53.0" }
arrow-select = { version = "53.0" }
arrow-schema = { version = "53.0" }
parquet = { version = "53.0", features = ["object_store"] }
object_store = "0.11.0"
hdfs-native-object-store = "0.12.0"
hdfs-native = "0.10.0"
walkdir = "2.5.0"
6 changes: 3 additions & 3 deletions acceptance/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,16 @@ thiserror = "1"
url = "2"

[build-dependencies]
ureq = "2.2"
ureq = "2.10"
flate2 = "1.0"
tar = "0.4"

[dev-dependencies]
datatest-stable = "0.2"
test-log = { version = "0.2", default-features = false, features = ["trace"] }
tempfile = "3"
test-case = { version = "3.1.0" }
tokio = { version = "1.39" }
test-case = { version = "3.3.1" }
tokio = { version = "1.40" }
tracing-subscriber = { version = "0.3", default-features = false, features = [
"env-filter",
"fmt",
Expand Down
12 changes: 6 additions & 6 deletions ffi/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,20 @@ url = "2"
delta_kernel = { path = "../kernel", default-features = false, features = [
"developer-visibility",
] }
delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.3.0" }
delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.3.1" }

# used if we use the default engine to be able to move arrow data into the c-ffi format
arrow-schema = { version = "^52.0", default-features = false, features = [
arrow-schema = { version = "53.0", default-features = false, features = [
"ffi",
], optional = true }
arrow-data = { version = "^52.0", default-features = false, features = [
arrow-data = { version = "53.0", default-features = false, features = [
"ffi",
], optional = true }
arrow-array = { version = "^52.0", default-features = false, optional = true }
arrow-array = { version = "53.0", default-features = false, optional = true }

[build-dependencies]
cbindgen = "0.26.0"
libc = "0.2.147"
cbindgen = "0.27.0"
libc = "0.2.158"

[dev-dependencies]
rand = "0.8.5"
Expand Down
24 changes: 12 additions & 12 deletions kernel/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,28 +15,28 @@ exclude = ["tests/golden_tables.rs", "tests/golden_data/" ]
all-features = true

[dependencies]
bytes = "1.4"
bytes = "1.7"
chrono = { version = "0.4" }
either = "1.8"
either = "1.13"
fix-hidden-lifetime-bug = "0.2"
indexmap = "2.2.1"
indexmap = "2.5.0"
itertools = "0.13"
lazy_static = "1.4"
roaring = "0.10.1"
lazy_static = "1.5"
roaring = "0.10.6"
serde = { version = "1", features = ["derive", "rc"] }
serde_json = "1"
thiserror = "1"
# only for structured logging
tracing = { version = "0.1", features = ["log"] }
url = "2"
uuid = "1.3.0"
uuid = "1.10.0"
z85 = "3.0.5"

# bring in our derive macros
delta_kernel_derive = { path = "../derive-macros", version = "0.3.0" }
delta_kernel_derive = { path = "../derive-macros", version = "0.3.1" }

# used for developer-visibility
visibility = "0.1.0"
visibility = "0.1.1"

# Used in default engine
arrow-buffer = { workspace = true, optional = true }
Expand All @@ -53,20 +53,20 @@ hdfs-native-object-store = { workspace = true, optional = true }
# Used in default and sync engine
parquet = { workspace = true, optional = true }
# Used for fetching direct urls (like pre-signed urls)
reqwest = { version = "^0.12.0", optional = true }
reqwest = { version = "0.12.7", optional = true }
strum = { version = "0.26", features = ["derive"] }


# optionally used with default engine (though not required)
tokio = { version = "1.39", optional = true, features = ["rt-multi-thread"] }
tokio = { version = "1.40", optional = true, features = ["rt-multi-thread"] }

# Used in integration tests
hdfs-native = { workspace = true, optional = true }
walkdir = { workspace = true, optional = true }

[features]
arrow-conversion = ["arrow-schema"]
arrow-expression = ["arrow-arith", "arrow-array", "arrow-ord", "arrow-schema"]
arrow-expression = ["arrow-arith", "arrow-array", "arrow-buffer", "arrow-ord", "arrow-schema"]
cloud = [
"object_store/aws",
"object_store/azure",
Expand Down Expand Up @@ -109,7 +109,7 @@ integration-test = [
]

[build-dependencies]
rustc_version = "0.4.0"
rustc_version = "0.4.1"

[dev-dependencies]
arrow = { workspace = true, features = ["json", "prettyprint"] }
Expand Down
4 changes: 2 additions & 2 deletions kernel/examples/read-table-multi-threaded/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ publish = false

[dependencies]
arrow = { workspace = true, features = ["prettyprint", "chrono-tz"] }
clap = { version = "^4.4", features = ["derive"] }
clap = { version = "4.5", features = ["derive"] }
delta_kernel = { path = "../../../kernel", features = [
"cloud",
"default-engine",
"developer-visibility",
"tokio",
] }
env_logger = "0.11.3"
env_logger = "0.11.5"
itertools = "0.13"
spmc = "0.3.0"
url = "2"
4 changes: 2 additions & 2 deletions kernel/examples/read-table-single-threaded/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ publish = false

[dependencies]
arrow = { workspace = true, features = ["prettyprint", "chrono-tz"] }
clap = { version = "^4.4", features = ["derive"] }
clap = { version = "4.5", features = ["derive"] }
delta_kernel = { path = "../../../kernel", features = [
"cloud",
"default-engine",
"developer-visibility",
"tokio",
] }
env_logger = "0.11.3"
env_logger = "0.11.5"
itertools = "0.13"
url = "2"
36 changes: 34 additions & 2 deletions kernel/src/engine/arrow_conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ use arrow_schema::{
use itertools::Itertools;

use crate::error::Error;
use crate::schema::{ArrayType, DataType, MapType, PrimitiveType, StructField, StructType};
use crate::schema::{
ArrayType, DataType, MapType, MetadataValue, PrimitiveType, StructField, StructType,
};

pub(crate) const LIST_ARRAY_ROOT: &str = "element";
pub(crate) const MAP_ROOT_DEFAULT: &str = "key_value";
Expand All @@ -32,7 +34,10 @@ impl TryFrom<&StructField> for ArrowField {
let metadata = f
.metadata()
.iter()
.map(|(key, val)| Ok((key.clone(), serde_json::to_string(val)?)))
.map(|(key, val)| match &val {
&MetadataValue::String(val) => Ok((key.clone(), val.clone())),
_ => Ok((key.clone(), serde_json::to_string(val)?)),
})
.collect::<Result<_, serde_json::Error>>()
.map_err(|err| ArrowError::JsonError(err.to_string()))?;

Expand Down Expand Up @@ -250,3 +255,30 @@ impl TryFrom<&ArrowDataType> for DataType {
}
}
}

#[cfg(test)]
mod tests {
use crate::engine::arrow_conversion::ArrowField;
use crate::{
schema::{DataType, StructField},
DeltaResult,
};
use std::collections::HashMap;

#[test]
fn test_metadata_string_conversion() -> DeltaResult<()> {
let mut metadata = HashMap::new();
metadata.insert("description", "hello world".to_owned());
let struct_field =
StructField::new("name", DataType::STRING, false).with_metadata(metadata);

let arrow_field = ArrowField::try_from(&struct_field)?;
let new_metadata = arrow_field.metadata();

assert_eq!(
new_metadata.get("description").unwrap(),
&"hello world".to_owned()
);
Ok(())
}
}
21 changes: 0 additions & 21 deletions kernel/src/engine/arrow_get_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,24 +77,3 @@ impl<'a> GetData<'a> for MapArray {
}
}
}

macro_rules! impl_null_get {
( $(($name: ident, $typ: ty)), * ) => {
$(
fn $name(&'a self, _row_index: usize, _field_name: &str) -> DeltaResult<Option<$typ>> {
Ok(None)
}
)*
};
}

impl<'a> GetData<'a> for () {
impl_null_get!(
(get_bool, bool),
(get_int, i32),
(get_long, i64),
(get_str, &'a str),
(get_list, ListItem<'a>),
(get_map, MapItem<'a>)
);
}
Loading

0 comments on commit ac51e9d

Please sign in to comment.