From a3a76718abbb37843504341a5b84a21871ebe856 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Fri, 14 Feb 2025 16:06:00 +0100 Subject: [PATCH 01/38] fix: get prefix from offset path Signed-off-by: Robert Pack --- ffi/src/lib.rs | 8 ++------ kernel/src/engine/default/filesystem.rs | 27 +++++++++++++------------ kernel/src/engine/default/mod.rs | 9 ++++----- kernel/src/log_segment/tests.rs | 1 - kernel/src/snapshot.rs | 3 --- kernel/tests/read.rs | 14 ++----------- kernel/tests/write.rs | 2 +- 7 files changed, 23 insertions(+), 41 deletions(-) diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index caf04ef2c8..0675d86bae 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -718,7 +718,7 @@ impl Default for ReferenceSet { #[cfg(test)] mod tests { use delta_kernel::engine::default::{executor::tokio::TokioBackgroundExecutor, DefaultEngine}; - use object_store::{memory::InMemory, path::Path}; + use object_store::memory::InMemory; use test_utils::{actions_to_string, add_commit, TestAction}; use super::*; @@ -792,11 +792,7 @@ mod tests { actions_to_string(vec![TestAction::Metadata]), ) .await?; - let engine = DefaultEngine::new( - storage.clone(), - Path::from("/"), - Arc::new(TokioBackgroundExecutor::new()), - ); + let engine = DefaultEngine::new(storage.clone(), Arc::new(TokioBackgroundExecutor::new())); let engine = engine_to_handle(Arc::new(engine), allocate_err); let path = "memory:///"; diff --git a/kernel/src/engine/default/filesystem.rs b/kernel/src/engine/default/filesystem.rs index 5606a28d02..baf3826164 100644 --- a/kernel/src/engine/default/filesystem.rs +++ b/kernel/src/engine/default/filesystem.rs @@ -14,7 +14,6 @@ use crate::{DeltaResult, Error, FileMeta, FileSlice, FileSystemClient}; pub struct ObjectStoreFileSystemClient { inner: Arc, has_ordered_listing: bool, - table_root: Path, task_executor: Arc, readahead: usize, } @@ -23,13 +22,11 @@ impl ObjectStoreFileSystemClient { pub(crate) fn new( store: Arc, has_ordered_listing: bool, - table_root: Path, task_executor: Arc, ) -> Self { Self { inner: store, has_ordered_listing, - table_root, task_executor, readahead: 10, } @@ -49,8 +46,14 @@ impl FileSystemClient for ObjectStoreFileSystemClient { ) -> DeltaResult>>> { let url = path.clone(); let offset = Path::from(path.path()); - // TODO properly handle table prefix - let prefix = self.table_root.child("_delta_log"); + let parts = offset.parts().collect_vec(); + if parts.is_empty() { + return Err(Error::generic(format!( + "Offset path must not be a root directory. Got: '{}'", + url.as_str() + ))); + } + let prefix = Path::from_iter(parts[..parts.len() - 1].iter().cloned()); let store = self.inner.clone(); @@ -192,11 +195,9 @@ mod tests { let mut url = Url::from_directory_path(tmp.path()).unwrap(); let store = Arc::new(LocalFileSystem::new()); - let prefix = Path::from(url.path()); let client = ObjectStoreFileSystemClient::new( store, false, // don't have ordered listing - prefix, Arc::new(TokioBackgroundExecutor::new()), ); @@ -229,11 +230,10 @@ mod tests { store.put(&name, data.clone().into()).await.unwrap(); let table_root = Url::parse("memory:///").expect("valid url"); - let prefix = Path::from_url_path(table_root.path()).expect("Couldn't get path"); - let engine = DefaultEngine::new(store, prefix, Arc::new(TokioBackgroundExecutor::new())); + let engine = DefaultEngine::new(store, Arc::new(TokioBackgroundExecutor::new())); let files: Vec<_> = engine .get_file_system_client() - .list_from(&table_root) + .list_from(&table_root.join("_delta_log/0").unwrap()) .unwrap() .try_collect() .unwrap(); @@ -260,11 +260,12 @@ mod tests { let url = Url::from_directory_path(tmp.path()).unwrap(); let store = Arc::new(LocalFileSystem::new()); - let prefix = Path::from_url_path(url.path()).expect("Couldn't get path"); - let engine = DefaultEngine::new(store, prefix, Arc::new(TokioBackgroundExecutor::new())); + let engine = DefaultEngine::new(store, Arc::new(TokioBackgroundExecutor::new())); let client = engine.get_file_system_client(); - let files = client.list_from(&Url::parse("file://").unwrap()).unwrap(); + let files = client + .list_from(&url.join("_delta_log/0").unwrap()) + .unwrap(); let mut len = 0; for (file, expected) in files.zip(expected_names.iter()) { assert!( diff --git a/kernel/src/engine/default/mod.rs b/kernel/src/engine/default/mod.rs index d89cf29cd0..3b3bc094bd 100644 --- a/kernel/src/engine/default/mod.rs +++ b/kernel/src/engine/default/mod.rs @@ -10,7 +10,7 @@ use std::collections::HashMap; use std::sync::Arc; use self::storage::parse_url_opts; -use object_store::{path::Path, DynObjectStore}; +use object_store::DynObjectStore; use url::Url; use self::executor::TaskExecutor; @@ -60,8 +60,8 @@ impl DefaultEngine { V: Into, { // table root is the path of the table in the ObjectStore - let (store, table_root) = parse_url_opts(table_root, options)?; - Ok(Self::new(Arc::new(store), table_root, task_executor)) + let (store, _table_root) = parse_url_opts(table_root, options)?; + Ok(Self::new(Arc::new(store), task_executor)) } /// Create a new [`DefaultEngine`] instance @@ -71,7 +71,7 @@ impl DefaultEngine { /// - `store`: The object store to use. /// - `table_root_path`: The root path of the table within storage. /// - `task_executor`: Used to spawn async IO tasks. See [executor::TaskExecutor]. - pub fn new(store: Arc, table_root: Path, task_executor: Arc) -> Self { + pub fn new(store: Arc, task_executor: Arc) -> Self { // HACK to check if we're using a LocalFileSystem from ObjectStore. We need this because // local filesystem doesn't return a sorted list by default. Although the `object_store` // crate explicitly says it _does not_ return a sorted listing, in practice all the cloud @@ -97,7 +97,6 @@ impl DefaultEngine { file_system: Arc::new(ObjectStoreFileSystemClient::new( store.clone(), !is_local, - table_root, task_executor.clone(), )), json: Arc::new(DefaultJsonHandler::new( diff --git a/kernel/src/log_segment/tests.rs b/kernel/src/log_segment/tests.rs index 5db1c45811..0ab9ee6c0c 100644 --- a/kernel/src/log_segment/tests.rs +++ b/kernel/src/log_segment/tests.rs @@ -98,7 +98,6 @@ fn build_log_with_paths_and_checkpoint( let client = ObjectStoreFileSystemClient::new( store, false, // don't have ordered listing - Path::from("/"), Arc::new(TokioBackgroundExecutor::new()), ); diff --git a/kernel/src/snapshot.rs b/kernel/src/snapshot.rs index f198b9080b..e3b1bf9f0c 100644 --- a/kernel/src/snapshot.rs +++ b/kernel/src/snapshot.rs @@ -249,11 +249,9 @@ mod tests { let url = url::Url::from_directory_path(path).unwrap(); let store = Arc::new(LocalFileSystem::new()); - let prefix = Path::from(url.path()); let client = ObjectStoreFileSystemClient::new( store, false, // don't have ordered listing - prefix, Arc::new(TokioBackgroundExecutor::new()), ); let cp = read_last_checkpoint(&client, &url).unwrap(); @@ -291,7 +289,6 @@ mod tests { let client = ObjectStoreFileSystemClient::new( store, false, // don't have ordered listing - Path::from("/"), Arc::new(TokioBackgroundExecutor::new()), ); let url = Url::parse("memory:///valid/").expect("valid url"); diff --git a/kernel/tests/read.rs b/kernel/tests/read.rs index 9d5d243147..12ba577875 100644 --- a/kernel/tests/read.rs +++ b/kernel/tests/read.rs @@ -58,7 +58,6 @@ async fn single_commit_two_add_files() -> Result<(), Box> let location = Url::parse("memory:///")?; let engine = Arc::new(DefaultEngine::new( storage.clone(), - Path::from("/"), Arc::new(TokioBackgroundExecutor::new()), )); @@ -113,11 +112,7 @@ async fn two_commits() -> Result<(), Box> { .await?; let location = Url::parse("memory:///").unwrap(); - let engine = DefaultEngine::new( - storage.clone(), - Path::from("/"), - Arc::new(TokioBackgroundExecutor::new()), - ); + let engine = DefaultEngine::new(storage.clone(), Arc::new(TokioBackgroundExecutor::new())); let table = Table::new(location); let expected_data = vec![batch.clone(), batch]; @@ -171,11 +166,7 @@ async fn remove_action() -> Result<(), Box> { .await?; let location = Url::parse("memory:///").unwrap(); - let engine = DefaultEngine::new( - storage.clone(), - Path::from("/"), - Arc::new(TokioBackgroundExecutor::new()), - ); + let engine = DefaultEngine::new(storage.clone(), Arc::new(TokioBackgroundExecutor::new())); let table = Table::new(location); let expected_data = vec![batch]; @@ -249,7 +240,6 @@ async fn stats() -> Result<(), Box> { let location = Url::parse("memory:///").unwrap(); let engine = Arc::new(DefaultEngine::new( storage.clone(), - Path::from(""), Arc::new(TokioBackgroundExecutor::new()), )); diff --git a/kernel/tests/write.rs b/kernel/tests/write.rs index 2ee6dfdd5b..6335be7dd8 100644 --- a/kernel/tests/write.rs +++ b/kernel/tests/write.rs @@ -46,7 +46,7 @@ fn setup( let table_root_path = Path::from(format!("{base_path}{table_name}")); let url = Url::parse(&format!("{base_url}{table_root_path}/")).unwrap(); let executor = Arc::new(TokioBackgroundExecutor::new()); - let engine = DefaultEngine::new(Arc::clone(&storage), table_root_path, executor); + let engine = DefaultEngine::new(Arc::clone(&storage), executor); (storage, engine, url) } From 5d7a754a0feb5757ed367b479c1a49122282223b Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Sat, 15 Feb 2025 01:05:36 +0100 Subject: [PATCH 02/38] test: add acceptance tests for list_from Signed-off-by: Robert Pack --- kernel/src/engine/default/filesystem.rs | 20 ++++--- kernel/src/engine/default/mod.rs | 17 ++++++ kernel/src/engine/mod.rs | 77 +++++++++++++++++++++++++ kernel/src/engine/sync/fs_client.rs | 2 +- kernel/src/engine/sync/json.rs | 4 ++ kernel/src/engine/sync/mod.rs | 14 +++++ kernel/src/lib.rs | 2 +- 7 files changed, 126 insertions(+), 10 deletions(-) diff --git a/kernel/src/engine/default/filesystem.rs b/kernel/src/engine/default/filesystem.rs index baf3826164..aa22df44d4 100644 --- a/kernel/src/engine/default/filesystem.rs +++ b/kernel/src/engine/default/filesystem.rs @@ -46,14 +46,18 @@ impl FileSystemClient for ObjectStoreFileSystemClient { ) -> DeltaResult>>> { let url = path.clone(); let offset = Path::from(path.path()); - let parts = offset.parts().collect_vec(); - if parts.is_empty() { - return Err(Error::generic(format!( - "Offset path must not be a root directory. Got: '{}'", - url.as_str() - ))); - } - let prefix = Path::from_iter(parts[..parts.len() - 1].iter().cloned()); + let prefix = if url.path().ends_with('/') { + offset.clone() + } else { + let parts = offset.parts().collect_vec(); + if parts.is_empty() { + return Err(Error::generic(format!( + "Offset path must not be a root directory. Got: '{}'", + url.as_str() + ))); + } + Path::from_iter(parts[..parts.len() - 1].iter().cloned()) + }; let store = self.inner.clone(); diff --git a/kernel/src/engine/default/mod.rs b/kernel/src/engine/default/mod.rs index 3b3bc094bd..db4588eb00 100644 --- a/kernel/src/engine/default/mod.rs +++ b/kernel/src/engine/default/mod.rs @@ -157,3 +157,20 @@ impl Engine for DefaultEngine { self.parquet.clone() } } + +#[cfg(test)] +mod tests { + use super::executor::tokio::TokioBackgroundExecutor; + use super::*; + use crate::engine::tests::test_arrow_engine; + use object_store::local::LocalFileSystem; + + #[test] + fn test_default_engine() { + let tmp = tempfile::tempdir().unwrap(); + let url = Url::from_directory_path(tmp.path()).unwrap(); + let store = Arc::new(LocalFileSystem::new()); + let engine = DefaultEngine::new(store, Arc::new(TokioBackgroundExecutor::new())); + test_arrow_engine(&engine, &url); + } +} diff --git a/kernel/src/engine/mod.rs b/kernel/src/engine/mod.rs index 8ea07384a0..e962ee5a3f 100644 --- a/kernel/src/engine/mod.rs +++ b/kernel/src/engine/mod.rs @@ -27,3 +27,80 @@ pub(crate) mod arrow_get_data; pub(crate) mod ensure_data_types; #[cfg(any(feature = "default-engine-base", feature = "sync-engine"))] pub mod parquet_row_group_skipping; + +#[cfg(test)] +mod tests { + use arrow_array::{RecordBatch, StringArray}; + use arrow_schema::{DataType as ArrowDataType, Field, Schema as ArrowSchema}; + use itertools::Itertools; + use object_store::path::Path; + use std::sync::Arc; + use url::Url; + + use crate::engine::arrow_data::ArrowEngineData; + use crate::{Engine, EngineData}; + + use test_utils::delta_path_for_version; + + fn test_list_from_should_sort_and_filter( + engine: &dyn Engine, + base_url: &Url, + engine_data: impl Fn() -> Box, + ) { + let json = engine.get_json_handler(); + let get_data = || Box::new(std::iter::once(Ok(engine_data()))); + + let expected_names: Vec = (1..4) + .map(|i| delta_path_for_version(i, "json")) + .collect_vec(); + + for i in expected_names.iter().rev() { + let path = base_url.join(i.as_ref()).unwrap(); + json.write_json_file(&path, get_data(), false).unwrap(); + } + let path = base_url.join("other").unwrap(); + json.write_json_file(&path, get_data(), false).unwrap(); + + let fs = engine.get_file_system_client(); + + // list files after an offset + let test_url = base_url.join(expected_names[0].as_ref()).unwrap(); + let files: Vec<_> = fs.list_from(&test_url).unwrap().try_collect().unwrap(); + assert_eq!(files.len(), expected_names.len() - 1); + for (file, expected) in files.iter().zip(expected_names.iter().skip(1)) { + assert_eq!(file.location, base_url.join(expected.as_ref()).unwrap()); + } + + let test_url = base_url + .join(delta_path_for_version(0, "json").as_ref()) + .unwrap(); + let files: Vec<_> = fs.list_from(&test_url).unwrap().try_collect().unwrap(); + assert_eq!(files.len(), expected_names.len()); + + // list files inside a directory / key prefix + let test_url = base_url.join("_delta_log/").unwrap(); + let files: Vec<_> = fs.list_from(&test_url).unwrap().try_collect().unwrap(); + assert_eq!(files.len(), expected_names.len()); + for (file, expected) in files.iter().zip(expected_names.iter()) { + assert_eq!(file.location, base_url.join(expected.as_ref()).unwrap()); + } + } + + fn get_arrow_data() -> Box { + let schema = Arc::new(ArrowSchema::new(vec![Field::new( + "dog", + ArrowDataType::Utf8, + true, + )])); + let data = RecordBatch::try_new( + schema.clone(), + vec![Arc::new(StringArray::from(vec!["remi", "wilson"]))], + ) + .unwrap(); + Box::new(ArrowEngineData::new(data)) + } + + pub(crate) fn test_arrow_engine(engine: &dyn Engine, base_url: &Url) { + test_list_from_should_sort_and_filter(engine, base_url, get_arrow_data); + } +} diff --git a/kernel/src/engine/sync/fs_client.rs b/kernel/src/engine/sync/fs_client.rs index 9577b1499b..050d2ae853 100644 --- a/kernel/src/engine/sync/fs_client.rs +++ b/kernel/src/engine/sync/fs_client.rs @@ -39,7 +39,7 @@ impl FileSystemClient for SyncFilesystemClient { let all_ents: Vec<_> = std::fs::read_dir(path_to_read)? .filter(|ent_res| { match (ent_res, min_file_name) { - (Ok(ent), Some(min_file_name)) => ent.file_name() >= *min_file_name, + (Ok(ent), Some(min_file_name)) => ent.file_name() > *min_file_name, _ => true, // Keep unfiltered and/or error entries } }) diff --git a/kernel/src/engine/sync/json.rs b/kernel/src/engine/sync/json.rs index 3d33b10251..04078653d0 100644 --- a/kernel/src/engine/sync/json.rs +++ b/kernel/src/engine/sync/json.rs @@ -65,6 +65,10 @@ impl JsonHandler for SyncJsonHandler { ))); }; + if !parent.exists() { + std::fs::create_dir_all(parent)?; + } + // write data to tmp file let mut tmp_file = NamedTempFile::new_in(parent)?; let buf = to_json_bytes(data)?; diff --git a/kernel/src/engine/sync/mod.rs b/kernel/src/engine/sync/mod.rs index f637ec1056..5ab95c1b15 100644 --- a/kernel/src/engine/sync/mod.rs +++ b/kernel/src/engine/sync/mod.rs @@ -97,3 +97,17 @@ where .map(|data| Ok(Box::new(ArrowEngineData::new(data??.into())) as _)); Ok(Box::new(result)) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::engine::tests::test_arrow_engine; + + #[test] + fn test_sync_engine() { + let tmp = tempfile::tempdir().unwrap(); + let url = url::Url::from_directory_path(tmp.path()).unwrap(); + let engine = SyncEngine::new(); + test_arrow_engine(&engine, &url); + } +} diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index 8dde21afe5..cb4fb50d11 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -43,7 +43,7 @@ //! //! Delta Kernel needs to perform some basic operations against file systems like listing and //! reading files. These interactions are encapsulated in the [`FileSystemClient`] trait. -//! Implementors must take care that all assumptions on the behavior if the functions - like sorted +//! Implementers must take care that all assumptions on the behavior if the functions - like sorted //! results - are respected. //! //! ## Reading log and data files From 8340575d3043d798644fa081f3fbd66596fec5e7 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Sat, 15 Feb 2025 01:10:09 +0100 Subject: [PATCH 03/38] docs: update list_from docs Signed-off-by: Robert Pack --- kernel/src/lib.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index cb4fb50d11..413aaad93c 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -346,8 +346,11 @@ pub trait ExpressionHandler: AsAny { /// file system where the Delta table is present. Connector implementation of /// this trait can hide filesystem specific details from Delta Kernel. pub trait FileSystemClient: AsAny { - /// List the paths in the same directory that are lexicographically greater or equal to + /// List the paths in the same directory that are lexicographically greater than /// (UTF-8 sorting) the given `path`. The result should also be sorted by the file name. + /// + /// If the path is directory-like (ends with '/'), the result should contain + /// all the files in the directory. fn list_from(&self, path: &Url) -> DeltaResult>>>; From f08143a435c6c0d6ef930cd6b52de7e50d480224 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Sat, 15 Feb 2025 01:17:34 +0100 Subject: [PATCH 04/38] test: fix tests Signed-off-by: Robert Pack --- kernel/src/engine/sync/fs_client.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/src/engine/sync/fs_client.rs b/kernel/src/engine/sync/fs_client.rs index 050d2ae853..9c0d1b80df 100644 --- a/kernel/src/engine/sync/fs_client.rs +++ b/kernel/src/engine/sync/fs_client.rs @@ -106,7 +106,7 @@ mod tests { writeln!(f, "null")?; f.flush()?; - let url_path = tmp_dir.path().join(get_json_filename(1)); + let url_path = tmp_dir.path().join(get_json_filename(0)); let url = Url::from_file_path(url_path).unwrap(); let files: Vec<_> = client.list_from(&url)?.try_collect()?; @@ -137,11 +137,11 @@ mod tests { // i+1 in index because we started at 0001 in the listing assert_eq!( file?.location.to_file_path().unwrap().to_str().unwrap(), - expected[i + 1].to_str().unwrap() + expected[i + 2].to_str().unwrap() ); file_count += 1; } - assert_eq!(file_count, 2); + assert_eq!(file_count, 1); let url_path = tmp_dir.path().join(""); let url = Url::from_file_path(url_path).unwrap(); From c042df68dfdfde6a19c77fd4e0ded2c2d6fc58ad Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Sat, 15 Feb 2025 01:47:42 +0100 Subject: [PATCH 05/38] fix: try using path from url to fix windows failures Signed-off-by: Robert Pack --- kernel/src/engine/default/filesystem.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/src/engine/default/filesystem.rs b/kernel/src/engine/default/filesystem.rs index aa22df44d4..ebdc2a3de8 100644 --- a/kernel/src/engine/default/filesystem.rs +++ b/kernel/src/engine/default/filesystem.rs @@ -45,7 +45,7 @@ impl FileSystemClient for ObjectStoreFileSystemClient { path: &Url, ) -> DeltaResult>>> { let url = path.clone(); - let offset = Path::from(path.path()); + let offset = Path::from_url_path(path.path())?; let prefix = if url.path().ends_with('/') { offset.clone() } else { From c0b028e7882e98e5cfd1240858d3e51d6c5ba0de Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Sat, 15 Feb 2025 01:54:56 +0100 Subject: [PATCH 06/38] fix: try using path from url to fix windows failures Signed-off-by: Robert Pack --- kernel/src/engine/default/json.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/src/engine/default/json.rs b/kernel/src/engine/default/json.rs index ab296e12a1..a5608fcfc1 100644 --- a/kernel/src/engine/default/json.rs +++ b/kernel/src/engine/default/json.rs @@ -102,7 +102,7 @@ impl JsonHandler for DefaultJsonHandler { let buffer = to_json_bytes(data)?; // Put if absent let store = self.store.clone(); // cheap Arc - let path = Path::from(path.path()); + let path = Path::from_url_path(path.path())?; let path_str = path.to_string(); self.task_executor .block_on(async move { From 7c722656dd5b59b6c5368a69ec376702cbcca800 Mon Sep 17 00:00:00 2001 From: Robert Pack <42610831+roeap@users.noreply.github.com> Date: Wed, 19 Mar 2025 11:51:04 -0700 Subject: [PATCH 07/38] Update kernel/src/engine/default/filesystem.rs Co-authored-by: Ryan Johnson --- kernel/src/engine/default/filesystem.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/src/engine/default/filesystem.rs b/kernel/src/engine/default/filesystem.rs index ebdc2a3de8..435fb669d9 100644 --- a/kernel/src/engine/default/filesystem.rs +++ b/kernel/src/engine/default/filesystem.rs @@ -50,13 +50,13 @@ impl FileSystemClient for ObjectStoreFileSystemClient { offset.clone() } else { let parts = offset.parts().collect_vec(); - if parts.is_empty() { + if parts.pop().is_empty() { return Err(Error::generic(format!( "Offset path must not be a root directory. Got: '{}'", url.as_str() ))); } - Path::from_iter(parts[..parts.len() - 1].iter().cloned()) + Path::from_iter(parts) }; let store = self.inner.clone(); From 24129f7f8b555eae011e55df7f3011ad4ee9ae3c Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Thu, 20 Feb 2025 05:40:15 +1300 Subject: [PATCH 08/38] fix: Handle predicates on non-nullable columns without stats (#700) Fixes #698 ## What changes are proposed in this pull request? Updates the `DataSkippingFilter` to treat all columns as nullable for the purpose of parsing stats, as suggested in https://github.com/delta-io/delta-kernel-rs/issues/698#issuecomment-2658229733. This is particularly important for partition columns, which won't have values present in stats. But stats are also only usually stored for the first 32 columns, so we shouldn't rely on stats being present for non-partition fields either. ## How was this change tested? I've added a new unit test. I've also tested building duckdb-delta with this change (cherry-picked onto 0.6.1) and verified that the code in #698 now works. --- kernel/src/scan/data_skipping.rs | 33 +++++++- kernel/tests/read.rs | 127 ++++++++++++++++++++++++++++++- test-utils/src/lib.rs | 12 ++- 3 files changed, 166 insertions(+), 6 deletions(-) diff --git a/kernel/src/scan/data_skipping.rs b/kernel/src/scan/data_skipping.rs index 11181863dc..0575747447 100644 --- a/kernel/src/scan/data_skipping.rs +++ b/kernel/src/scan/data_skipping.rs @@ -75,6 +75,28 @@ impl DataSkippingFilter { let (predicate, referenced_schema) = physical_predicate?; debug!("Creating a data skipping filter for {:#?}", predicate); + // Convert all fields into nullable, as stats may not be available for all columns + // (and usually aren't for partition columns). + struct NullableStatsTransform; + impl<'a> SchemaTransform<'a> for NullableStatsTransform { + fn transform_struct_field( + &mut self, + field: &'a StructField, + ) -> Option> { + use Cow::*; + let field = match self.transform(&field.data_type)? { + Borrowed(_) if field.is_nullable() => Borrowed(field), + data_type => Owned(StructField { + name: field.name.clone(), + data_type: data_type.into_owned(), + nullable: true, + metadata: field.metadata.clone(), + }), + }; + Some(field) + } + } + // Convert a min/max stats schema into a nullcount schema (all leaf fields are LONG) struct NullCountStatsTransform; impl<'a> SchemaTransform<'a> for NullCountStatsTransform { @@ -85,14 +107,19 @@ impl DataSkippingFilter { Some(Cow::Owned(PrimitiveType::Long)) } } - let nullcount_schema = NullCountStatsTransform + + let stats_schema = NullableStatsTransform .transform_struct(&referenced_schema)? .into_owned(); + + let nullcount_schema = NullCountStatsTransform + .transform_struct(&stats_schema)? + .into_owned(); let stats_schema = Arc::new(StructType::new([ StructField::nullable("numRecords", DataType::LONG), StructField::nullable("nullCount", nullcount_schema), - StructField::nullable("minValues", referenced_schema.clone()), - StructField::nullable("maxValues", referenced_schema), + StructField::nullable("minValues", stats_schema.clone()), + StructField::nullable("maxValues", stats_schema), ])); // Skipping happens in several steps: diff --git a/kernel/tests/read.rs b/kernel/tests/read.rs index 12ba577875..3f89875430 100644 --- a/kernel/tests/read.rs +++ b/kernel/tests/read.rs @@ -15,10 +15,12 @@ use delta_kernel::scan::state::{transform_to_logical, visit_scan_files, DvInfo, use delta_kernel::scan::Scan; use delta_kernel::schema::{DataType, Schema}; use delta_kernel::{Engine, FileMeta, Table}; +use itertools::Itertools; use object_store::{memory::InMemory, path::Path, ObjectStore}; +use parquet::file::properties::{EnabledStatistics, WriterProperties}; use test_utils::{ actions_to_string, add_commit, generate_batch, generate_simple_batch, into_record_batch, - record_batch_to_bytes, IntoArray, TestAction, METADATA, + record_batch_to_bytes, record_batch_to_bytes_with_props, IntoArray, TestAction, METADATA, }; use url::Url; @@ -896,6 +898,129 @@ fn with_predicate_and_removes() -> Result<(), Box> { Ok(()) } +#[tokio::test] +async fn predicate_on_non_nullable_partition_column() -> Result<(), Box> { + // Test for https://github.com/delta-io/delta-kernel-rs/issues/698 + let batch = generate_batch(vec![("val", vec!["a", "b", "c"].into_array())])?; + + let storage = Arc::new(InMemory::new()); + let actions = [ + r#"{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}"#.to_string(), + r#"{"commitInfo":{"timestamp":1587968586154,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[\"id\"]"},"isBlindAppend":true}}"#.to_string(), + r#"{"metaData":{"id":"5fba94ed-9794-4965-ba6e-6ee3c0d22af9","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":false,\"metadata\":{}},{\"name\":\"val\",\"type\":\"string\",\"nullable\":false,\"metadata\":{}}]}","partitionColumns":["id"],"configuration":{},"createdTime":1587968585495}}"#.to_string(), + format!(r#"{{"add":{{"path":"id=1/{PARQUET_FILE1}","partitionValues":{{"id":"1"}},"size":0,"modificationTime":1587968586000,"dataChange":true, "stats":"{{\"numRecords\":3,\"nullCount\":{{\"val\":0}},\"minValues\":{{\"val\":\"a\"}},\"maxValues\":{{\"val\":\"c\"}}}}"}}}}"#), + format!(r#"{{"add":{{"path":"id=2/{PARQUET_FILE2}","partitionValues":{{"id":"2"}},"size":0,"modificationTime":1587968586000,"dataChange":true, "stats":"{{\"numRecords\":3,\"nullCount\":{{\"val\":0}},\"minValues\":{{\"val\":\"a\"}},\"maxValues\":{{\"val\":\"c\"}}}}"}}}}"#), + ]; + + add_commit(storage.as_ref(), 0, actions.iter().join("\n")).await?; + storage + .put( + &Path::from("id=1").child(PARQUET_FILE1), + record_batch_to_bytes(&batch).into(), + ) + .await?; + storage + .put( + &Path::from("id=2").child(PARQUET_FILE2), + record_batch_to_bytes(&batch).into(), + ) + .await?; + + let location = Url::parse("memory:///")?; + let table = Table::new(location); + + let engine = Arc::new(DefaultEngine::new( + storage.clone(), + Path::from("/"), + Arc::new(TokioBackgroundExecutor::new()), + )); + let snapshot = Arc::new(table.snapshot(engine.as_ref(), None)?); + + let predicate = Expression::eq(column_expr!("id"), 2); + let scan = snapshot + .scan_builder() + .with_predicate(Arc::new(predicate)) + .build()?; + + let stream = scan.execute(engine)?; + + let mut files_scanned = 0; + for engine_data in stream { + let mut result_batch = into_record_batch(engine_data?.raw_data?); + let _ = result_batch.remove_column(result_batch.schema().index_of("id")?); + assert_eq!(&batch, &result_batch); + files_scanned += 1; + } + // Partition pruning is not yet implemented, so we still read the data for both partitions + assert_eq!(2, files_scanned); + Ok(()) +} + +#[tokio::test] +async fn predicate_on_non_nullable_column_missing_stats() -> Result<(), Box> +{ + let batch_1 = generate_batch(vec![("val", vec!["a", "b", "c"].into_array())])?; + let batch_2 = generate_batch(vec![("val", vec!["d", "e", "f"].into_array())])?; + + let storage = Arc::new(InMemory::new()); + let actions = [ + r#"{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}"#.to_string(), + r#"{"commitInfo":{"timestamp":1587968586154,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}}"#.to_string(), + r#"{"metaData":{"id":"5fba94ed-9794-4965-ba6e-6ee3c0d22af9","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"val\",\"type\":\"string\",\"nullable\":false,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1587968585495}}"#.to_string(), + // Add one file with stats, one file without + format!(r#"{{"add":{{"path":"{PARQUET_FILE1}","partitionValues":{{}},"size":0,"modificationTime":1587968586000,"dataChange":true, "stats":"{{\"numRecords\":3,\"nullCount\":{{\"val\":0}},\"minValues\":{{\"val\":\"a\"}},\"maxValues\":{{\"val\":\"c\"}}}}"}}}}"#), + format!(r#"{{"add":{{"path":"{PARQUET_FILE2}","partitionValues":{{}},"size":0,"modificationTime":1587968586000,"dataChange":true, "stats":"{{\"numRecords\":3,\"nullCount\":{{}},\"minValues\":{{}},\"maxValues\":{{}}}}"}}}}"#), + ]; + + // Disable writing Parquet statistics so these cannot be used for pruning row groups + let writer_props = WriterProperties::builder() + .set_statistics_enabled(EnabledStatistics::None) + .build(); + + add_commit(storage.as_ref(), 0, actions.iter().join("\n")).await?; + storage + .put( + &Path::from(PARQUET_FILE1), + record_batch_to_bytes_with_props(&batch_1, writer_props.clone()).into(), + ) + .await?; + storage + .put( + &Path::from(PARQUET_FILE2), + record_batch_to_bytes_with_props(&batch_2, writer_props).into(), + ) + .await?; + + let location = Url::parse("memory:///")?; + let table = Table::new(location); + + let engine = Arc::new(DefaultEngine::new( + storage.clone(), + Path::from("/"), + Arc::new(TokioBackgroundExecutor::new()), + )); + let snapshot = Arc::new(table.snapshot(engine.as_ref(), None)?); + + let predicate = Expression::eq(column_expr!("val"), "g"); + let scan = snapshot + .scan_builder() + .with_predicate(Arc::new(predicate)) + .build()?; + + let stream = scan.execute(engine)?; + + let mut files_scanned = 0; + for engine_data in stream { + let result_batch = into_record_batch(engine_data?.raw_data?); + assert_eq!(&batch_2, &result_batch); + files_scanned += 1; + } + // One file is scanned as stats are missing so we don't know the predicate isn't satisfied + assert_eq!(1, files_scanned); + + Ok(()) +} + #[test] fn short_dv() -> Result<(), Box> { let expected = vec![ diff --git a/test-utils/src/lib.rs b/test-utils/src/lib.rs index 2605bea562..0aeee887d7 100644 --- a/test-utils/src/lib.rs +++ b/test-utils/src/lib.rs @@ -37,9 +37,17 @@ pub fn actions_to_string(actions: Vec) -> String { /// convert a RecordBatch into a vector of bytes. We can't use `From` since these are both foreign /// types pub fn record_batch_to_bytes(batch: &RecordBatch) -> Vec { - let mut data: Vec = Vec::new(); let props = WriterProperties::builder().build(); - let mut writer = ArrowWriter::try_new(&mut data, batch.schema(), Some(props)).unwrap(); + record_batch_to_bytes_with_props(batch, props) +} + +pub fn record_batch_to_bytes_with_props( + batch: &RecordBatch, + writer_properties: WriterProperties, +) -> Vec { + let mut data: Vec = Vec::new(); + let mut writer = + ArrowWriter::try_new(&mut data, batch.schema(), Some(writer_properties)).unwrap(); writer.write(batch).expect("Writing batch"); // writer must be closed to write footer writer.close().unwrap(); From 8b1dffe43978c9039b961a9caa4d5b0dc9ea44d2 Mon Sep 17 00:00:00 2001 From: Zach Schuermann Date: Wed, 19 Feb 2025 08:53:06 -0800 Subject: [PATCH 09/38] feat!(ffi): new visit_schema FFI and rename old visit_schema to visit_snapshot_schema (#683) ## What changes are proposed in this pull request? When given a schema (e.g. in `global_scan_state`) the engine needs a way to visit this schema. This introduces a new API `visit_schema` to allow engines to visit any schema over FFI. An API called `visit_schema` previously existed but visited the schema of a given _snapshot_; this has now been renamed to `visit_snapshot_schema`. ### This PR affects the following public APIs Renamed `visit_schema` to `visit_snapshot_schema` and now `visit_schema` takes `SharedSchema` as an argument instead of a snapshot. ## How was this change tested? updated read_table test --- ffi/examples/read-table/schema.h | 2 +- ffi/src/schema.rs | 27 ++++++++++++++++++++++++--- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/ffi/examples/read-table/schema.h b/ffi/examples/read-table/schema.h index 8c29675a64..1a8960b2c5 100644 --- a/ffi/examples/read-table/schema.h +++ b/ffi/examples/read-table/schema.h @@ -273,7 +273,7 @@ void print_schema(SharedSnapshot* snapshot) .visit_timestamp = visit_timestamp, .visit_timestamp_ntz = visit_timestamp_ntz, }; - uintptr_t schema_list_id = visit_schema(snapshot, &visitor); + uintptr_t schema_list_id = visit_snapshot_schema(snapshot, &visitor); #ifdef VERBOSE printf("Schema returned in list %" PRIxPTR "\n", schema_list_id); #endif diff --git a/ffi/src/schema.rs b/ffi/src/schema.rs index 23da22bc33..f033ac8d99 100644 --- a/ffi/src/schema.rs +++ b/ffi/src/schema.rs @@ -1,6 +1,6 @@ use std::os::raw::c_void; -use crate::scan::CStringMap; +use crate::scan::{CStringMap, SharedSchema}; use crate::{handle::Handle, kernel_string_slice, KernelStringSlice, SharedSnapshot}; use delta_kernel::schema::{ArrayType, DataType, MapType, PrimitiveType, StructType}; @@ -201,11 +201,32 @@ pub struct EngineSchemaVisitor { /// /// Caller is responsible for passing a valid snapshot handle and schema visitor. #[no_mangle] -pub unsafe extern "C" fn visit_schema( +pub unsafe extern "C" fn visit_snapshot_schema( snapshot: Handle, visitor: &mut EngineSchemaVisitor, ) -> usize { let snapshot = unsafe { snapshot.as_ref() }; + visit_schema_impl(snapshot.schema(), visitor) +} + +/// Visit the given `schema` using the provided `visitor`. See the documentation of +/// [`EngineSchemaVisitor`] for a description of how this visitor works. +/// +/// This method returns the id of the list allocated to hold the top level schema columns. +/// +/// # Safety +/// +/// Caller is responsible for passing a valid schema handle and schema visitor. +#[no_mangle] +pub unsafe extern "C" fn visit_schema( + schema: Handle, + visitor: &mut EngineSchemaVisitor, +) -> usize { + let schema = unsafe { schema.as_ref() }; + visit_schema_impl(schema, visitor) +} + +fn visit_schema_impl(schema: &StructType, visitor: &mut EngineSchemaVisitor) -> usize { // Visit all the fields of a struct and return the list of children fn visit_struct_fields(visitor: &EngineSchemaVisitor, s: &StructType) -> usize { let child_list_id = (visitor.make_field_list)(visitor.data, s.fields.len()); @@ -316,5 +337,5 @@ pub unsafe extern "C" fn visit_schema( } } - visit_struct_fields(visitor, snapshot.schema()) + visit_struct_fields(visitor, schema) } From 6654cca79663bdc4536da52bcc77d88622756215 Mon Sep 17 00:00:00 2001 From: "R. Tyler Croy" Date: Thu, 20 Feb 2025 15:07:29 -0800 Subject: [PATCH 10/38] feat: introduce feature flags to select major arrow versions (#654) This change introduces arrow_53 and arrow_54 feature flags on kernel which are _required_ when using default-engine or sync-engine. Fundamentally we must push users of the crate to select their arrow major version through flags since Cargo _will_ include multiple major versions in the dependency tree which can cause ABI breakages when passing around symbols such as `RecordBatch` See #640 --------- Signed-off-by: R. Tyler Croy --- .github/workflows/build.yml | 8 +-- .github/workflows/default-kernel-features | 1 + Cargo.toml | 15 ----- acceptance/Cargo.toml | 7 +-- acceptance/src/data.rs | 17 +++--- feature-tests/Cargo.toml | 2 +- ffi/Cargo.toml | 15 +---- ffi/cbindgen.toml | 2 +- ffi/src/engine_data.rs | 18 +++--- integration-tests/Cargo.toml | 17 +----- integration-tests/src/main.rs | 9 +-- integration-tests/test-all-arrow-versions.sh | 33 ++++------- kernel/Cargo.toml | 58 ++++++++----------- kernel/examples/inspect-table/Cargo.toml | 4 +- kernel/examples/read-table-changes/Cargo.toml | 4 +- .../examples/read-table-changes/src/main.rs | 4 +- .../read-table-multi-threaded/Cargo.toml | 3 +- .../read-table-single-threaded/Cargo.toml | 3 +- kernel/src/actions/visitors.rs | 4 +- kernel/src/arrow.rs | 11 ++++ kernel/src/engine/arrow_conversion.rs | 5 +- kernel/src/engine/arrow_data.rs | 12 ++-- kernel/src/engine/arrow_expression.rs | 31 +++++----- kernel/src/engine/arrow_get_data.rs | 2 +- kernel/src/engine/arrow_utils.rs | 41 +++++++------ kernel/src/engine/default/file_stream.rs | 4 +- kernel/src/engine/default/json.rs | 8 +-- kernel/src/engine/default/parquet.rs | 17 +++--- kernel/src/engine/ensure_data_types.rs | 8 +-- .../src/engine/parquet_row_group_skipping.rs | 8 +-- .../parquet_row_group_skipping/tests.rs | 2 +- kernel/src/engine/sync/json.rs | 11 ++-- kernel/src/engine/sync/mod.rs | 2 +- kernel/src/engine/sync/parquet.rs | 4 +- kernel/src/error.rs | 15 +++-- kernel/src/lib.rs | 2 + kernel/src/parquet.rs | 11 ++++ kernel/src/scan/mod.rs | 4 +- kernel/src/transaction.rs | 43 ++++++-------- kernel/tests/cdf.rs | 4 +- kernel/tests/common/mod.rs | 8 +-- kernel/tests/golden_tables.rs | 16 ++--- kernel/tests/read.rs | 7 +-- kernel/tests/write.rs | 32 +++++----- test-utils/Cargo.toml | 5 +- test-utils/src/lib.rs | 8 +-- 46 files changed, 249 insertions(+), 296 deletions(-) create mode 100644 .github/workflows/default-kernel-features create mode 100644 kernel/src/arrow.rs create mode 100644 kernel/src/parquet.rs diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a8a24dd074..14e6423b8f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -37,7 +37,7 @@ jobs: cargo install cargo-msrv --locked - name: verify-msrv run: | - cargo msrv --path kernel/ verify --all-features + cargo msrv --path kernel/ verify --features $(cat .github/workflows/default-kernel-features) cargo msrv --path derive-macros/ verify --all-features cargo msrv --path ffi/ verify --all-features cargo msrv --path ffi-proc-macros/ verify --all-features @@ -104,7 +104,7 @@ jobs: - name: check kernel builds with no-default-features run: cargo build -p delta_kernel --no-default-features - name: build and lint with clippy - run: cargo clippy --benches --tests --all-features -- -D warnings + run: cargo clippy --benches --tests --features $(cat .github/workflows/default-kernel-features) -- -D warnings - name: lint without default features run: cargo clippy --no-default-features -- -D warnings - name: check kernel builds with default-engine @@ -129,7 +129,7 @@ jobs: override: true - uses: Swatinem/rust-cache@v2 - name: test - run: cargo test --workspace --verbose --all-features -- --skip read_table_version_hdfs + run: cargo test --workspace --verbose --features $(cat .github/workflows/default-kernel-features) -- --skip read_table_version_hdfs ffi_test: runs-on: ${{ matrix.os }} @@ -229,7 +229,7 @@ jobs: uses: taiki-e/install-action@cargo-llvm-cov - uses: Swatinem/rust-cache@v2 - name: Generate code coverage - run: cargo llvm-cov --all-features --workspace --codecov --output-path codecov.json -- --skip read_table_version_hdfs + run: cargo llvm-cov --features $(cat .github/workflows/default-kernel-features) --workspace --codecov --output-path codecov.json -- --skip read_table_version_hdfs - name: Upload coverage to Codecov uses: codecov/codecov-action@v5 with: diff --git a/.github/workflows/default-kernel-features b/.github/workflows/default-kernel-features new file mode 100644 index 0000000000..bee74feefa --- /dev/null +++ b/.github/workflows/default-kernel-features @@ -0,0 +1 @@ +integration-test,default-engine,default-engine-rustls,cloud,arrow,sync-engine diff --git a/Cargo.toml b/Cargo.toml index ec7993736c..aec38fc78b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,21 +23,6 @@ rust-version = "1.80" version = "0.6.1" [workspace.dependencies] -# When changing the arrow version range, also modify ffi/Cargo.toml which has -# its own arrow version ranges witeh modified features. Failure to do so will -# result in compilation errors as two different sets of arrow dependencies may -# be sourced -arrow = { version = ">=53, <55" } -arrow-arith = { version = ">=53, <55" } -arrow-array = { version = ">=53, <55" } -arrow-buffer = { version = ">=53, <55" } -arrow-cast = { version = ">=53, <55" } -arrow-data = { version = ">=53, <55" } -arrow-ord = { version = ">=53, <55" } -arrow-json = { version = ">=53, <55" } -arrow-select = { version = ">=53, <55" } -arrow-schema = { version = ">=53, <55" } -parquet = { version = ">=53, <55", features = ["object_store"] } object_store = { version = ">=0.11, <0.12" } hdfs-native-object-store = "0.12.0" hdfs-native = "0.10.0" diff --git a/acceptance/Cargo.toml b/acceptance/Cargo.toml index 2854c7c393..e844007ef5 100644 --- a/acceptance/Cargo.toml +++ b/acceptance/Cargo.toml @@ -14,19 +14,14 @@ rust-version.workspace = true release = false [dependencies] -arrow-array = { workspace = true } -arrow-cast = { workspace = true } -arrow-ord = { workspace = true } -arrow-select = { workspace = true } -arrow-schema = { workspace = true } delta_kernel = { path = "../kernel", features = [ "default-engine", + "arrow_53", "developer-visibility", ] } futures = "0.3" itertools = "0.13" object_store = { workspace = true } -parquet = { workspace = true } serde = { version = "1", features = ["derive"] } serde_json = "1" thiserror = "1" diff --git a/acceptance/src/data.rs b/acceptance/src/data.rs index c515d50c90..b045634b5c 100644 --- a/acceptance/src/data.rs +++ b/acceptance/src/data.rs @@ -1,15 +1,18 @@ use std::{path::Path, sync::Arc}; -use arrow_array::{Array, RecordBatch}; -use arrow_ord::sort::{lexsort_to_indices, SortColumn}; -use arrow_schema::{DataType, Schema}; -use arrow_select::{concat::concat_batches, filter::filter_record_batch, take::take}; +use delta_kernel::arrow::array::{Array, RecordBatch}; +use delta_kernel::arrow::compute::{ + concat_batches, filter_record_batch, lexsort_to_indices, take, SortColumn, +}; +use delta_kernel::arrow::datatypes::{DataType, Schema}; +use delta_kernel::parquet::arrow::async_reader::{ + ParquetObjectReader, ParquetRecordBatchStreamBuilder, +}; use delta_kernel::{engine::arrow_data::ArrowEngineData, DeltaResult, Engine, Error, Table}; use futures::{stream::TryStreamExt, StreamExt}; use itertools::Itertools; use object_store::{local::LocalFileSystem, ObjectStore}; -use parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder}; use crate::{TestCaseInfo, TestResult}; @@ -83,8 +86,8 @@ fn assert_schema_fields_match(schema: &Schema, golden: &Schema) { fn normalize_col(col: Arc) -> Arc { if let DataType::Timestamp(unit, Some(zone)) = col.data_type() { if **zone == *"+00:00" { - arrow_cast::cast::cast(&col, &DataType::Timestamp(*unit, Some("UTC".into()))) - .expect("Could not cast to UTC") + let data_type = DataType::Timestamp(*unit, Some("UTC".into())); + delta_kernel::arrow::compute::cast(&col, &data_type).expect("Could not cast to UTC") } else { col } diff --git a/feature-tests/Cargo.toml b/feature-tests/Cargo.toml index 7e45e41e27..43f3773a77 100644 --- a/feature-tests/Cargo.toml +++ b/feature-tests/Cargo.toml @@ -12,7 +12,7 @@ version.workspace = true release = false [dependencies] -delta_kernel = { path = "../kernel" } +delta_kernel = { path = "../kernel", features = ["arrow_53"] } [features] default-engine = [ "delta_kernel/default-engine" ] diff --git a/ffi/Cargo.toml b/ffi/Cargo.toml index aa4edc167f..d588427b00 100644 --- a/ffi/Cargo.toml +++ b/ffi/Cargo.toml @@ -22,21 +22,13 @@ tracing-core = { version = "0.1", optional = true } tracing-subscriber = { version = "0.3", optional = true, features = [ "json" ] } url = "2" delta_kernel = { path = "../kernel", default-features = false, features = [ + "arrow", "developer-visibility", ] } delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.6.1" } -# used if we use the default engine to be able to move arrow data into the c-ffi format -arrow-schema = { version = ">=53, <55", default-features = false, features = [ - "ffi", -], optional = true } -arrow-data = { version = ">=53, <55", default-features = false, features = [ - "ffi", -], optional = true } -arrow-array = { version = ">=53, <55", default-features = false, optional = true } - [build-dependencies] -cbindgen = "0.27.0" +cbindgen = "0.28" libc = "0.2.158" [dev-dependencies] @@ -52,9 +44,6 @@ default = ["default-engine"] cloud = ["delta_kernel/cloud"] default-engine = [ "delta_kernel/default-engine", - "arrow-array", - "arrow-data", - "arrow-schema", ] tracing = [ "tracing-core", "tracing-subscriber" ] sync-engine = ["delta_kernel/sync-engine"] diff --git a/ffi/cbindgen.toml b/ffi/cbindgen.toml index 491333ac13..9f9fe90991 100644 --- a/ffi/cbindgen.toml +++ b/ffi/cbindgen.toml @@ -25,4 +25,4 @@ parse_deps = true # only crates found in this list will ever be parsed. # # default: there is no allow-list (NOTE: this is the opposite of []) -include = ["delta_kernel", "arrow-data", "arrow-schema"] +include = ["arrow", "arrow-data", "arrow-schema", "delta_kernel"] diff --git a/ffi/src/engine_data.rs b/ffi/src/engine_data.rs index 3363c9034b..01eaaa343b 100644 --- a/ffi/src/engine_data.rs +++ b/ffi/src/engine_data.rs @@ -1,5 +1,9 @@ //! EngineData related ffi code +use delta_kernel::arrow::array::{ + ffi::{FFI_ArrowArray, FFI_ArrowSchema}, + ArrayData, StructArray, +}; use delta_kernel::{DeltaResult, EngineData}; use std::ffi::c_void; @@ -45,8 +49,8 @@ unsafe fn get_raw_engine_data_impl(data: &mut Handle) -> &m #[cfg(feature = "default-engine")] #[repr(C)] pub struct ArrowFFIData { - pub array: arrow_data::ffi::FFI_ArrowArray, - pub schema: arrow_schema::ffi::FFI_ArrowSchema, + pub array: FFI_ArrowArray, + pub schema: FFI_ArrowSchema, } // TODO: This should use a callback to avoid having to have the engine free the struct @@ -71,16 +75,16 @@ pub unsafe extern "C" fn get_raw_arrow_data( // TODO: This method leaks the returned pointer memory. How will the engine free it? #[cfg(feature = "default-engine")] fn get_raw_arrow_data_impl(data: Box) -> DeltaResult<*mut ArrowFFIData> { - let record_batch: arrow_array::RecordBatch = data + let record_batch: delta_kernel::arrow::array::RecordBatch = data .into_any() .downcast::() .map_err(|_| delta_kernel::Error::EngineDataType("ArrowEngineData".to_string()))? .into(); - let sa: arrow_array::StructArray = record_batch.into(); - let array_data: arrow_data::ArrayData = sa.into(); + let sa: StructArray = record_batch.into(); + let array_data: ArrayData = sa.into(); // these call `clone`. is there a way to not copy anything and what exactly are they cloning? - let array = arrow_data::ffi::FFI_ArrowArray::new(&array_data); - let schema = arrow_schema::ffi::FFI_ArrowSchema::try_from(array_data.data_type())?; + let array = FFI_ArrowArray::new(&array_data); + let schema = FFI_ArrowSchema::try_from(array_data.data_type())?; let ret_data = Box::new(ArrowFFIData { array, schema }); Ok(Box::leak(ret_data)) } diff --git a/integration-tests/Cargo.toml b/integration-tests/Cargo.toml index cc0a5abd12..02e924260b 100644 --- a/integration-tests/Cargo.toml +++ b/integration-tests/Cargo.toml @@ -6,19 +6,4 @@ edition = "2021" [workspace] [dependencies] -arrow = "=53.0.0" -delta_kernel = { path = "../kernel", features = ["arrow-conversion", "arrow-expression", "default-engine", "sync-engine"] } - -[patch.'file:///../kernel'] -arrow = "=53.0.0" -arrow-arith = "=53.0.0" -arrow-array = "=53.0.0" -arrow-buffer = "=53.0.0" -arrow-cast = "=53.0.0" -arrow-data = "=53.0.0" -arrow-ord = "=53.0.0" -arrow-json = "=53.0.0" -arrow-select = "=53.0.0" -arrow-schema = "=53.0.0" -parquet = "=53.0.0" -object_store = "=0.11.1" +delta_kernel = { path = "../kernel", features = ["default-engine", "sync-engine"] } diff --git a/integration-tests/src/main.rs b/integration-tests/src/main.rs index 55a809e8ce..db26d0e4de 100644 --- a/integration-tests/src/main.rs +++ b/integration-tests/src/main.rs @@ -1,15 +1,16 @@ -fn create_arrow_schema() -> arrow::datatypes::Schema { - use arrow::datatypes::{DataType, Field, Schema}; +use delta_kernel::arrow::datatypes::{DataType, Field, Schema}; + +fn create_arrow_schema() -> Schema { let field_a = Field::new("a", DataType::Int64, false); let field_b = Field::new("b", DataType::Boolean, false); Schema::new(vec![field_a, field_b]) } fn create_kernel_schema() -> delta_kernel::schema::Schema { - use delta_kernel::schema::{DataType, Schema, StructField}; + use delta_kernel::schema::{DataType, StructField}; let field_a = StructField::not_null("a", DataType::LONG); let field_b = StructField::not_null("b", DataType::BOOLEAN); - Schema::new(vec![field_a, field_b]) + delta_kernel::schema::Schema::new(vec![field_a, field_b]) } fn main() { diff --git a/integration-tests/test-all-arrow-versions.sh b/integration-tests/test-all-arrow-versions.sh index 35c8fdc7d6..13fa42618f 100755 --- a/integration-tests/test-all-arrow-versions.sh +++ b/integration-tests/test-all-arrow-versions.sh @@ -2,38 +2,25 @@ set -eu -o pipefail -is_version_le() { - [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ] -} - -is_version_lt() { - if [ "$1" = "$2" ] - then - return 1 - else - is_version_le "$1" "$2" - fi -} - test_arrow_version() { ARROW_VERSION="$1" echo "== Testing version $ARROW_VERSION ==" - sed -i'' -e "s/\(arrow[^\"]*=[^\"]*\).*/\1\"=$ARROW_VERSION\"/" Cargo.toml - sed -i'' -e "s/\(parquet[^\"]*\).*/\1\"=$ARROW_VERSION\"/" Cargo.toml cargo clean rm -f Cargo.lock cargo update cat Cargo.toml - cargo run + cargo run --features ${ARROW_VERSION} } -MIN_ARROW_VER="53.0.0" -MAX_ARROW_VER="54.0.0" +FEATURES=$(cat ../kernel/Cargo.toml | grep -e ^arrow_ | awk '{ print $1 }' | sort -u) -for ARROW_VERSION in $(curl -s https://crates.io/api/v1/crates/arrow | jq -r '.versions[].num' | tr -d '\r') + +echo "[features]" >> Cargo.toml + +for ARROW_VERSION in ${FEATURES} do - if ! is_version_lt "$ARROW_VERSION" "$MIN_ARROW_VER" && is_version_lt "$ARROW_VERSION" "$MAX_ARROW_VER" - then - test_arrow_version "$ARROW_VERSION" - fi + echo "${ARROW_VERSION} = [\"delta_kernel/${ARROW_VERSION}\"]" >> Cargo.toml + test_arrow_version $ARROW_VERSION done + +git checkout Cargo.toml diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index 1431b1ff1f..01446e4717 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -58,20 +58,22 @@ visibility = "0.1.1" # Used in the sync engine tempfile = { version = "3", optional = true } + +# Arrow supported versions +## 53 # Used in default engine -arrow-buffer = { workspace = true, optional = true } -arrow-array = { workspace = true, optional = true, features = ["chrono-tz"] } -arrow-select = { workspace = true, optional = true } -arrow-arith = { workspace = true, optional = true } -arrow-cast = { workspace = true, optional = true } -arrow-json = { workspace = true, optional = true } -arrow-ord = { workspace = true, optional = true } -arrow-schema = { workspace = true, optional = true } +arrow_53 = { package = "arrow", version = "53", features = ["chrono-tz", "ffi", "json", "prettyprint"], optional = true } +# Used in default and sync engine +parquet_53 = { package = "parquet", version = "53", features = ["async", "object_store"] , optional = true } +###### +## 54 +arrow_54 = { package = "arrow", version = "54", features = ["chrono-tz", "ffi", "json", "prettyprint"], optional = true } +parquet_54 = { package = "parquet", version = "54", features = ["async", "object_store"] , optional = true } +###### + futures = { version = "0.3", optional = true } object_store = { workspace = true, optional = true } hdfs-native-object-store = { workspace = true, optional = true } -# Used in default and sync engine -parquet = { workspace = true, optional = true } # Used for fetching direct urls (like pre-signed urls) reqwest = { version = "0.12.8", default-features = false, optional = true } strum = { version = "0.26", features = ["derive"] } @@ -85,14 +87,16 @@ hdfs-native = { workspace = true, optional = true } walkdir = { workspace = true, optional = true } [features] -arrow-conversion = ["arrow-schema"] -arrow-expression = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-schema", -] +# The default version to be expected +arrow = ["arrow_53"] + +arrow_53 = ["dep:arrow_53", "dep:parquet_53"] + +arrow_54 = ["dep:arrow_54", "dep:parquet_54"] + +arrow-conversion = [] +arrow-expression = [] + cloud = [ "object_store/aws", "object_store/azure", @@ -107,16 +111,8 @@ default = [] default-engine-base = [ "arrow-conversion", "arrow-expression", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-json", - "arrow-schema", - "arrow-select", "futures", "object_store", - "parquet/async", - "parquet/object_store", "tokio", "uuid/v4", "uuid/fast-rng", @@ -134,13 +130,6 @@ default-engine-rustls = [ developer-visibility = [] sync-engine = [ - "arrow-cast", - "arrow-conversion", - "arrow-expression", - "arrow-array", - "arrow-json", - "arrow-select", - "parquet", "tempfile", ] integration-test = [ @@ -156,8 +145,7 @@ version = "=0.5.9" rustc_version = "0.4.1" [dev-dependencies] -arrow = { workspace = true, features = ["json", "prettyprint"] } -delta_kernel = { path = ".", features = ["default-engine", "sync-engine"] } +delta_kernel = { path = ".", features = ["arrow", "default-engine", "sync-engine"] } test_utils = { path = "../test-utils" } paste = "1.0" test-log = { version = "0.2", default-features = false, features = ["trace"] } diff --git a/kernel/examples/inspect-table/Cargo.toml b/kernel/examples/inspect-table/Cargo.toml index b81a8ac5bc..4208c69384 100644 --- a/kernel/examples/inspect-table/Cargo.toml +++ b/kernel/examples/inspect-table/Cargo.toml @@ -5,11 +5,11 @@ edition = "2021" publish = false [dependencies] -arrow-array = { workspace = true } -arrow-schema = { workspace = true } +arrow = "53" clap = { version = "4.5", features = ["derive"] } delta_kernel = { path = "../../../kernel", features = [ "cloud", + "arrow_53", "default-engine", "developer-visibility", ] } diff --git a/kernel/examples/read-table-changes/Cargo.toml b/kernel/examples/read-table-changes/Cargo.toml index 181da7dc6b..35f077bc2d 100644 --- a/kernel/examples/read-table-changes/Cargo.toml +++ b/kernel/examples/read-table-changes/Cargo.toml @@ -8,14 +8,12 @@ publish = false release = false [dependencies] -arrow-array = { workspace = true } -arrow-schema = { workspace = true } clap = { version = "4.5", features = ["derive"] } delta_kernel = { path = "../../../kernel", features = [ "cloud", + "arrow", "default-engine", ] } env_logger = "0.11.3" url = "2" itertools = "0.13" -arrow = { workspace = true, features = ["prettyprint"] } diff --git a/kernel/examples/read-table-changes/src/main.rs b/kernel/examples/read-table-changes/src/main.rs index 3360a06cf8..ddafc1554c 100644 --- a/kernel/examples/read-table-changes/src/main.rs +++ b/kernel/examples/read-table-changes/src/main.rs @@ -1,8 +1,8 @@ use std::{collections::HashMap, sync::Arc}; -use arrow::{compute::filter_record_batch, util::pretty::print_batches}; -use arrow_array::RecordBatch; use clap::Parser; +use delta_kernel::arrow::array::RecordBatch; +use delta_kernel::arrow::{compute::filter_record_batch, util::pretty::print_batches}; use delta_kernel::engine::arrow_data::ArrowEngineData; use delta_kernel::engine::default::executor::tokio::TokioBackgroundExecutor; use delta_kernel::engine::default::DefaultEngine; diff --git a/kernel/examples/read-table-multi-threaded/Cargo.toml b/kernel/examples/read-table-multi-threaded/Cargo.toml index 3362e579a9..8cb7c9cd3f 100644 --- a/kernel/examples/read-table-multi-threaded/Cargo.toml +++ b/kernel/examples/read-table-multi-threaded/Cargo.toml @@ -5,10 +5,11 @@ edition = "2021" publish = false [dependencies] -arrow = { workspace = true, features = ["prettyprint", "chrono-tz"] } +arrow = { version = "53", features = ["prettyprint", "chrono-tz"] } clap = { version = "4.5", features = ["derive"] } delta_kernel = { path = "../../../kernel", features = [ "cloud", + "arrow_53", "default-engine", "sync-engine", "developer-visibility", diff --git a/kernel/examples/read-table-single-threaded/Cargo.toml b/kernel/examples/read-table-single-threaded/Cargo.toml index dc04581397..e71959e7bc 100644 --- a/kernel/examples/read-table-single-threaded/Cargo.toml +++ b/kernel/examples/read-table-single-threaded/Cargo.toml @@ -5,9 +5,10 @@ edition = "2021" publish = false [dependencies] -arrow = { workspace = true, features = ["prettyprint", "chrono-tz"] } +arrow = { version = "53", features = ["prettyprint", "chrono-tz"] } clap = { version = "4.5", features = ["derive"] } delta_kernel = { path = "../../../kernel", features = [ + "arrow_53", "cloud", "default-engine", "sync-engine", diff --git a/kernel/src/actions/visitors.rs b/kernel/src/actions/visitors.rs index 9f34bd2c5f..72747ac6a1 100644 --- a/kernel/src/actions/visitors.rs +++ b/kernel/src/actions/visitors.rs @@ -514,8 +514,8 @@ pub(crate) fn visit_deletion_vector_at<'a>( mod tests { use std::sync::Arc; - use arrow_array::{RecordBatch, StringArray}; - use arrow_schema::{DataType, Field, Schema as ArrowSchema}; + use crate::arrow::array::{RecordBatch, StringArray}; + use crate::arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; use super::*; use crate::{ diff --git a/kernel/src/arrow.rs b/kernel/src/arrow.rs new file mode 100644 index 0000000000..ccae93013f --- /dev/null +++ b/kernel/src/arrow.rs @@ -0,0 +1,11 @@ +//! This module exists to help re-export the version of arrow used by default-engine and other +//! parts of kernel that need arrow + +#[cfg(all(feature = "arrow_53", feature = "arrow_54"))] +compile_error!("Multiple versions of the arrow cannot be used at the same time!"); + +#[cfg(feature = "arrow_53")] +pub use arrow_53::*; + +#[cfg(feature = "arrow_54")] +pub use arrow_54::*; diff --git a/kernel/src/engine/arrow_conversion.rs b/kernel/src/engine/arrow_conversion.rs index 0b905ff3aa..a425cd1437 100644 --- a/kernel/src/engine/arrow_conversion.rs +++ b/kernel/src/engine/arrow_conversion.rs @@ -2,10 +2,11 @@ use std::sync::Arc; -use arrow_schema::{ - ArrowError, DataType as ArrowDataType, Field as ArrowField, Schema as ArrowSchema, +use crate::arrow::datatypes::{ + DataType as ArrowDataType, Field as ArrowField, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, TimeUnit, }; +use crate::arrow::error::ArrowError; use itertools::Itertools; use crate::error::Error; diff --git a/kernel/src/engine/arrow_data.rs b/kernel/src/engine/arrow_data.rs index 000d623288..9883809013 100644 --- a/kernel/src/engine/arrow_data.rs +++ b/kernel/src/engine/arrow_data.rs @@ -2,12 +2,12 @@ use crate::engine_data::{EngineData, EngineList, EngineMap, GetData, RowVisitor} use crate::schema::{ColumnName, DataType}; use crate::{DeltaResult, Error}; -use arrow_array::cast::AsArray; -use arrow_array::types::{Int32Type, Int64Type}; -use arrow_array::{ +use crate::arrow::array::cast::AsArray; +use crate::arrow::array::types::{Int32Type, Int64Type}; +use crate::arrow::array::{ Array, ArrayRef, GenericListArray, MapArray, OffsetSizeTrait, RecordBatch, StructArray, }; -use arrow_schema::{DataType as ArrowDataType, FieldRef}; +use crate::arrow::datatypes::{DataType as ArrowDataType, FieldRef}; use tracing::debug; use std::collections::{HashMap, HashSet}; @@ -296,8 +296,8 @@ impl ArrowEngineData { mod tests { use std::sync::Arc; - use arrow_array::{RecordBatch, StringArray}; - use arrow_schema::{DataType, Field, Schema as ArrowSchema}; + use crate::arrow::array::{RecordBatch, StringArray}; + use crate::arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; use crate::{ actions::{get_log_schema, Metadata, Protocol}, diff --git a/kernel/src/engine/arrow_expression.rs b/kernel/src/engine/arrow_expression.rs index 8ee54ebd0e..b7a845171e 100644 --- a/kernel/src/engine/arrow_expression.rs +++ b/kernel/src/engine/arrow_expression.rs @@ -3,23 +3,24 @@ use std::borrow::Borrow; use std::collections::HashMap; use std::sync::Arc; -use arrow_arith::boolean::{and_kleene, is_null, not, or_kleene}; -use arrow_arith::numeric::{add, div, mul, sub}; -use arrow_array::cast::AsArray; -use arrow_array::{types::*, MapArray}; -use arrow_array::{ +use crate::arrow::array::AsArray; +use crate::arrow::array::{types::*, MapArray}; +use crate::arrow::array::{ Array, ArrayRef, BinaryArray, BooleanArray, Date32Array, Datum, Decimal128Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, ListArray, RecordBatch, StringArray, StructArray, TimestampMicrosecondArray, }; -use arrow_buffer::OffsetBuffer; -use arrow_ord::cmp::{distinct, eq, gt, gt_eq, lt, lt_eq, neq}; -use arrow_ord::comparison::in_list_utf8; -use arrow_schema::{ - ArrowError, DataType as ArrowDataType, Field as ArrowField, Fields, IntervalUnit, - Schema as ArrowSchema, TimeUnit, +use crate::arrow::buffer::OffsetBuffer; +use crate::arrow::compute::concat; +use crate::arrow::compute::kernels::cmp::{distinct, eq, gt, gt_eq, lt, lt_eq, neq}; +use crate::arrow::compute::kernels::comparison::in_list_utf8; +use crate::arrow::compute::kernels::numeric::{add, div, mul, sub}; +use crate::arrow::compute::{and_kleene, is_null, not, or_kleene}; +use crate::arrow::datatypes::{ + DataType as ArrowDataType, Field as ArrowField, Fields, IntervalUnit, Schema as ArrowSchema, + TimeUnit, }; -use arrow_select::concat::concat; +use crate::arrow::error::ArrowError; use itertools::Itertools; use super::arrow_conversion::LIST_ARRAY_ROOT; @@ -568,9 +569,9 @@ impl ExpressionEvaluator for DefaultExpressionEvaluator { mod tests { use std::ops::{Add, Div, Mul, Sub}; - use arrow_array::{GenericStringArray, Int32Array}; - use arrow_buffer::ScalarBuffer; - use arrow_schema::{DataType, Field, Fields, Schema}; + use crate::arrow::array::{GenericStringArray, Int32Array}; + use crate::arrow::buffer::ScalarBuffer; + use crate::arrow::datatypes::{DataType, Field, Fields, Schema}; use super::*; use crate::expressions::*; diff --git a/kernel/src/engine/arrow_get_data.rs b/kernel/src/engine/arrow_get_data.rs index 145aab66bb..fbed64df10 100644 --- a/kernel/src/engine/arrow_get_data.rs +++ b/kernel/src/engine/arrow_get_data.rs @@ -1,4 +1,4 @@ -use arrow_array::{ +use crate::arrow::array::{ types::{GenericStringType, Int32Type, Int64Type}, Array, BooleanArray, GenericByteArray, GenericListArray, MapArray, OffsetSizeTrait, PrimitiveArray, diff --git a/kernel/src/engine/arrow_utils.rs b/kernel/src/engine/arrow_utils.rs index e16303cf3c..749f1399c9 100644 --- a/kernel/src/engine/arrow_utils.rs +++ b/kernel/src/engine/arrow_utils.rs @@ -12,19 +12,19 @@ use crate::{ DeltaResult, EngineData, Error, }; -use arrow_array::{ +use crate::arrow::array::{ cast::AsArray, make_array, new_null_array, Array as ArrowArray, GenericListArray, OffsetSizeTrait, RecordBatch, StringArray, StructArray, }; -use arrow_buffer::NullBuffer; -use arrow_json::{LineDelimitedWriter, ReaderBuilder}; -use arrow_schema::{ +use crate::arrow::buffer::NullBuffer; +use crate::arrow::compute::concat_batches; +use crate::arrow::datatypes::{ DataType as ArrowDataType, Field as ArrowField, FieldRef as ArrowFieldRef, Fields, SchemaRef as ArrowSchemaRef, }; -use arrow_select::concat::concat_batches; +use crate::arrow::json::{LineDelimitedWriter, ReaderBuilder}; +use crate::parquet::{arrow::ProjectionMask, schema::types::SchemaDescriptor}; use itertools::Itertools; -use parquet::{arrow::ProjectionMask, schema::types::SchemaDescriptor}; use tracing::debug; macro_rules! prim_array_cmp { @@ -41,7 +41,7 @@ macro_rules! prim_array_cmp { .ok_or(Error::invalid_expression( format!("Cannot cast to list array: {}", $right_arr.data_type())) )?; - arrow_ord::comparison::in_list(prim_array, list_array).map(wrap_comparison_result) + crate::arrow::compute::kernels::comparison::in_list(prim_array, list_array).map(wrap_comparison_result) } )+ _ => Err(ArrowError::CastError( @@ -60,7 +60,10 @@ pub(crate) use prim_array_cmp; /// returns a tuples of (mask_indices: Vec, reorder_indices: /// Vec). `mask_indices` is used for generating the mask for reading from the pub(crate) fn make_arrow_error(s: impl Into) -> Error { - Error::Arrow(arrow_schema::ArrowError::InvalidArgumentError(s.into())).with_backtrace() + Error::Arrow(crate::arrow::error::ArrowError::InvalidArgumentError( + s.into(), + )) + .with_backtrace() } /// Applies post-processing to data read from parquet files. This includes `reorder_struct_array` to @@ -516,7 +519,7 @@ pub(crate) fn reorder_struct_array( match &reorder_index.transform { ReorderIndexTransform::Cast(target) => { let col = input_cols[parquet_position].as_ref(); - let col = Arc::new(arrow_cast::cast::cast(col, target)?); + let col = Arc::new(crate::arrow::compute::cast(col, target)?); let new_field = Arc::new( input_fields[parquet_position] .as_ref() @@ -742,17 +745,17 @@ pub(crate) fn to_json_bytes( mod tests { use std::sync::Arc; - use arrow::{ - array::AsArray, - buffer::{OffsetBuffer, ScalarBuffer}, - }; - use arrow_array::{ + use crate::arrow::array::{ Array, ArrayRef as ArrowArrayRef, BooleanArray, GenericListArray, Int32Array, StructArray, }; - use arrow_schema::{ + use crate::arrow::datatypes::{ DataType as ArrowDataType, Field as ArrowField, Fields, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, }; + use crate::arrow::{ + array::AsArray, + buffer::{OffsetBuffer, ScalarBuffer}, + }; use crate::schema::{ArrayType, DataType, MapType, StructField, StructType}; @@ -1498,9 +1501,9 @@ mod tests { #[test] fn test_arrow_broken_nested_null_masks() { + use crate::arrow::datatypes::{DataType, Field, Fields, Schema}; use crate::engine::arrow_utils::fix_nested_null_masks; - use arrow::datatypes::{DataType, Field, Fields, Schema}; - use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; + use crate::parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; // Parse some JSON into a nested schema let schema = Arc::new(Schema::new(vec![Field::new( @@ -1532,7 +1535,7 @@ mod tests { { "outer" : { "inner_non_null" : { "leaf_non_null" : 4 }, "inner_nullable" : { "leaf_non_null" : 5 } } } { "outer" : { "inner_non_null" : { "leaf_non_null" : 6 }, "inner_nullable" : { "leaf_non_null" : 7, "leaf_nullable": 8 } } } "#; - let batch1 = arrow::json::ReaderBuilder::new(schema.clone()) + let batch1 = crate::arrow::json::ReaderBuilder::new(schema.clone()) .build(json_string.as_bytes()) .unwrap() .next() @@ -1567,7 +1570,7 @@ mod tests { // Write the batch to a parquet file and read it back let mut buffer = vec![]; let mut writer = - parquet::arrow::ArrowWriter::try_new(&mut buffer, schema.clone(), None).unwrap(); + crate::parquet::arrow::ArrowWriter::try_new(&mut buffer, schema.clone(), None).unwrap(); writer.write(&batch1).unwrap(); writer.close().unwrap(); // writer must be closed to write footer let batch2 = ParquetRecordBatchReaderBuilder::try_new(bytes::Bytes::from(buffer)) diff --git a/kernel/src/engine/default/file_stream.rs b/kernel/src/engine/default/file_stream.rs index 075716a755..bcdc370a01 100644 --- a/kernel/src/engine/default/file_stream.rs +++ b/kernel/src/engine/default/file_stream.rs @@ -5,8 +5,8 @@ use std::pin::Pin; use std::sync::Arc; use std::task::{ready, Context, Poll}; -use arrow_array::RecordBatch; -use arrow_schema::SchemaRef as ArrowSchemaRef; +use crate::arrow::array::RecordBatch; +use crate::arrow::datatypes::SchemaRef as ArrowSchemaRef; use futures::future::BoxFuture; use futures::stream::{BoxStream, Stream, StreamExt}; use futures::FutureExt; diff --git a/kernel/src/engine/default/json.rs b/kernel/src/engine/default/json.rs index a5608fcfc1..bef3b30cd4 100644 --- a/kernel/src/engine/default/json.rs +++ b/kernel/src/engine/default/json.rs @@ -5,8 +5,8 @@ use std::ops::Range; use std::sync::Arc; use std::task::{ready, Poll}; -use arrow_json::ReaderBuilder; -use arrow_schema::SchemaRef as ArrowSchemaRef; +use crate::arrow::datatypes::SchemaRef as ArrowSchemaRef; +use crate::arrow::json::ReaderBuilder; use bytes::{Buf, Bytes}; use futures::{StreamExt, TryStreamExt}; use object_store::path::Path; @@ -201,8 +201,8 @@ impl FileOpener for JsonOpener { mod tests { use std::path::PathBuf; - use arrow::array::{AsArray, RecordBatch, StringArray}; - use arrow_schema::{DataType, Field, Schema as ArrowSchema}; + use crate::arrow::array::{AsArray, RecordBatch, StringArray}; + use crate::arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; use itertools::Itertools; use object_store::{local::LocalFileSystem, ObjectStore}; diff --git a/kernel/src/engine/default/parquet.rs b/kernel/src/engine/default/parquet.rs index 50c816d3b4..f344ccd86c 100644 --- a/kernel/src/engine/default/parquet.rs +++ b/kernel/src/engine/default/parquet.rs @@ -4,16 +4,16 @@ use std::collections::HashMap; use std::ops::Range; use std::sync::Arc; -use arrow_array::builder::{MapBuilder, MapFieldNames, StringBuilder}; -use arrow_array::{BooleanArray, Int64Array, RecordBatch, StringArray}; +use crate::arrow::array::builder::{MapBuilder, MapFieldNames, StringBuilder}; +use crate::arrow::array::{BooleanArray, Int64Array, RecordBatch, StringArray}; +use crate::parquet::arrow::arrow_reader::{ + ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReaderBuilder, +}; +use crate::parquet::arrow::arrow_writer::ArrowWriter; +use crate::parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder}; use futures::StreamExt; use object_store::path::Path; use object_store::DynObjectStore; -use parquet::arrow::arrow_reader::{ - ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReaderBuilder, -}; -use parquet::arrow::arrow_writer::ArrowWriter; -use parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder}; use uuid::Uuid; use super::file_stream::{FileOpenFuture, FileOpener, FileStream}; @@ -361,8 +361,7 @@ mod tests { use std::path::PathBuf; use std::time::{SystemTime, UNIX_EPOCH}; - use arrow_array::array::Array; - use arrow_array::RecordBatch; + use crate::arrow::array::{Array, RecordBatch}; use object_store::{local::LocalFileSystem, memory::InMemory, ObjectStore}; use url::Url; diff --git a/kernel/src/engine/ensure_data_types.rs b/kernel/src/engine/ensure_data_types.rs index b6f1866719..da699be07b 100644 --- a/kernel/src/engine/ensure_data_types.rs +++ b/kernel/src/engine/ensure_data_types.rs @@ -5,7 +5,7 @@ use std::{ ops::Deref, }; -use arrow_schema::{DataType as ArrowDataType, Field as ArrowField}; +use crate::arrow::datatypes::{DataType as ArrowDataType, Field as ArrowField}; use itertools::Itertools; use crate::{ @@ -256,7 +256,7 @@ fn metadata_eq( #[cfg(test)] mod tests { - use arrow_schema::{DataType as ArrowDataType, Field as ArrowField, Fields}; + use crate::arrow::datatypes::{DataType as ArrowDataType, Field as ArrowField, Fields}; use crate::{ engine::ensure_data_types::ensure_data_types, @@ -276,8 +276,8 @@ mod tests { assert!(can_upcast_to_decimal(&Decimal128(5, 1), 6u8, 2i8)); assert!(can_upcast_to_decimal( &Decimal128(10, 5), - arrow_schema::DECIMAL128_MAX_PRECISION, - arrow_schema::DECIMAL128_MAX_SCALE - 5 + crate::arrow::datatypes::DECIMAL128_MAX_PRECISION, + crate::arrow::datatypes::DECIMAL128_MAX_SCALE - 5 )); assert!(can_upcast_to_decimal(&Int8, 3u8, 0i8)); diff --git a/kernel/src/engine/parquet_row_group_skipping.rs b/kernel/src/engine/parquet_row_group_skipping.rs index 79c87d9234..fbce2f9138 100644 --- a/kernel/src/engine/parquet_row_group_skipping.rs +++ b/kernel/src/engine/parquet_row_group_skipping.rs @@ -2,13 +2,13 @@ use crate::expressions::{ BinaryExpression, ColumnName, Expression, Scalar, UnaryExpression, VariadicExpression, }; +use crate::parquet::arrow::arrow_reader::ArrowReaderBuilder; +use crate::parquet::file::metadata::RowGroupMetaData; +use crate::parquet::file::statistics::Statistics; +use crate::parquet::schema::types::ColumnDescPtr; use crate::predicates::parquet_stats_skipping::ParquetStatsProvider; use crate::schema::{DataType, PrimitiveType}; use chrono::{DateTime, Days}; -use parquet::arrow::arrow_reader::ArrowReaderBuilder; -use parquet::file::metadata::RowGroupMetaData; -use parquet::file::statistics::Statistics; -use parquet::schema::types::ColumnDescPtr; use std::collections::{HashMap, HashSet}; use tracing::debug; diff --git a/kernel/src/engine/parquet_row_group_skipping/tests.rs b/kernel/src/engine/parquet_row_group_skipping/tests.rs index 37a3bb1b04..3f3bb8108b 100644 --- a/kernel/src/engine/parquet_row_group_skipping/tests.rs +++ b/kernel/src/engine/parquet_row_group_skipping/tests.rs @@ -1,8 +1,8 @@ use super::*; use crate::expressions::{column_expr, column_name}; +use crate::parquet::arrow::arrow_reader::ArrowReaderMetadata; use crate::predicates::DataSkippingPredicateEvaluator as _; use crate::Expression; -use parquet::arrow::arrow_reader::ArrowReaderMetadata; use std::fs::File; /// Performs an exhaustive set of reads against a specially crafted parquet file. diff --git a/kernel/src/engine/sync/json.rs b/kernel/src/engine/sync/json.rs index 04078653d0..f2212cb816 100644 --- a/kernel/src/engine/sync/json.rs +++ b/kernel/src/engine/sync/json.rs @@ -1,6 +1,7 @@ use std::{fs::File, io::BufReader, io::Write}; -use arrow_schema::SchemaRef as ArrowSchemaRef; +use crate::arrow::datatypes::SchemaRef as ArrowSchemaRef; +use crate::arrow::json::ReaderBuilder; use tempfile::NamedTempFile; use url::Url; @@ -22,7 +23,7 @@ fn try_create_from_json( arrow_schema: ArrowSchemaRef, _predicate: Option, ) -> DeltaResult>> { - let json = arrow_json::ReaderBuilder::new(arrow_schema) + let json = ReaderBuilder::new(arrow_schema) .build(BufReader::new(file))? .map(|data| Ok(ArrowEngineData::new(data?))); Ok(json) @@ -96,10 +97,8 @@ mod tests { use std::sync::Arc; - use arrow_array::{RecordBatch, StringArray}; - use arrow_schema::DataType as ArrowDataType; - use arrow_schema::Field; - use arrow_schema::Schema as ArrowSchema; + use crate::arrow::array::{RecordBatch, StringArray}; + use crate::arrow::datatypes::{DataType as ArrowDataType, Field, Schema as ArrowSchema}; use serde_json::json; use url::Url; diff --git a/kernel/src/engine/sync/mod.rs b/kernel/src/engine/sync/mod.rs index 5ab95c1b15..e4e00982cb 100644 --- a/kernel/src/engine/sync/mod.rs +++ b/kernel/src/engine/sync/mod.rs @@ -7,7 +7,7 @@ use crate::{ FileMeta, FileSystemClient, JsonHandler, ParquetHandler, SchemaRef, }; -use arrow_schema::{Schema as ArrowSchema, SchemaRef as ArrowSchemaRef}; +use crate::arrow::datatypes::{Schema as ArrowSchema, SchemaRef as ArrowSchemaRef}; use itertools::Itertools; use std::fs::File; use std::sync::Arc; diff --git a/kernel/src/engine/sync/parquet.rs b/kernel/src/engine/sync/parquet.rs index 8714c694f9..48010af308 100644 --- a/kernel/src/engine/sync/parquet.rs +++ b/kernel/src/engine/sync/parquet.rs @@ -1,7 +1,7 @@ use std::fs::File; -use arrow_schema::SchemaRef as ArrowSchemaRef; -use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ParquetRecordBatchReaderBuilder}; +use crate::arrow::datatypes::SchemaRef as ArrowSchemaRef; +use crate::parquet::arrow::arrow_reader::{ArrowReaderMetadata, ParquetRecordBatchReaderBuilder}; use super::read_files; use crate::engine::arrow_data::ArrowEngineData; diff --git a/kernel/src/error.rs b/kernel/src/error.rs index 815ef3e512..91e42821db 100644 --- a/kernel/src/error.rs +++ b/kernel/src/error.rs @@ -10,6 +10,9 @@ use crate::schema::{DataType, StructType}; use crate::table_properties::ParseIntervalError; use crate::Version; +#[cfg(any(feature = "default-engine-base", feature = "sync-engine"))] +use crate::arrow::error::ArrowError; + /// A [`std::result::Result`] that has the kernel [`Error`] as the error variant pub type DeltaResult = std::result::Result; @@ -29,7 +32,7 @@ pub enum Error { /// An error performing operations on arrow data #[cfg(any(feature = "default-engine-base", feature = "sync-engine"))] #[error(transparent)] - Arrow(arrow_schema::ArrowError), + Arrow(ArrowError), /// User tried to convert engine data to the wrong type #[error("Invalid engine data type. Could not convert to {0}")] @@ -58,10 +61,10 @@ pub enum Error { #[error("Internal error {0}. This is a kernel bug, please report.")] InternalError(String), - /// An error encountered while working with parquet data - #[cfg(feature = "parquet")] + /// An error enountered while working with parquet data + #[cfg(any(feature = "default-engine-base", feature = "sync-engine"))] #[error("Arrow error: {0}")] - Parquet(#[from] parquet::errors::ParquetError), + Parquet(#[from] crate::parquet::errors::ParquetError), /// An error interacting with the object_store crate // We don't use [#from] object_store::Error here as our From impl transforms @@ -304,8 +307,8 @@ from_with_backtrace!( ); #[cfg(any(feature = "default-engine-base", feature = "sync-engine"))] -impl From for Error { - fn from(value: arrow_schema::ArrowError) -> Self { +impl From for Error { + fn from(value: ArrowError) -> Self { Self::Arrow(value).with_backtrace() } } diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index 413aaad93c..2e46986582 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -87,6 +87,8 @@ pub mod table_features; pub mod table_properties; pub mod transaction; +pub mod arrow; +pub mod parquet; pub(crate) mod predicates; pub(crate) mod utils; diff --git a/kernel/src/parquet.rs b/kernel/src/parquet.rs new file mode 100644 index 0000000000..bc7eba68f5 --- /dev/null +++ b/kernel/src/parquet.rs @@ -0,0 +1,11 @@ +//! This module exists to help re-export the version of arrow used by default-engine and other +//! parts of kernel that need arrow + +#[cfg(all(feature = "arrow_53", feature = "arrow_54"))] +compile_error!("Multiple versions of the arrow cannot be used at the same time!"); + +#[cfg(feature = "arrow_53")] +pub use parquet_53::*; + +#[cfg(feature = "arrow_54")] +pub use parquet_54::*; diff --git a/kernel/src/scan/mod.rs b/kernel/src/scan/mod.rs index 14e2ee50ff..0672345eb5 100644 --- a/kernel/src/scan/mod.rs +++ b/kernel/src/scan/mod.rs @@ -665,8 +665,8 @@ pub fn selection_vector( pub(crate) mod test_utils { use std::sync::Arc; - use arrow_array::{RecordBatch, StringArray}; - use arrow_schema::{DataType, Field, Schema as ArrowSchema}; + use crate::arrow::array::{RecordBatch, StringArray}; + use crate::arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; use crate::{ actions::get_log_schema, diff --git a/kernel/src/transaction.rs b/kernel/src/transaction.rs index d74c2456a5..4905668a46 100644 --- a/kernel/src/transaction.rs +++ b/kernel/src/transaction.rs @@ -339,11 +339,11 @@ mod tests { use crate::schema::MapType; use crate::{ExpressionHandler, FileSystemClient, JsonHandler, ParquetHandler}; - use arrow::json::writer::LineDelimitedWriter; - use arrow::record_batch::RecordBatch; - use arrow_array::builder::StringBuilder; - use arrow_schema::Schema as ArrowSchema; - use arrow_schema::{DataType as ArrowDataType, Field}; + use crate::arrow::array::{MapArray, MapBuilder, MapFieldNames, StringArray, StringBuilder}; + use crate::arrow::datatypes::{DataType as ArrowDataType, Field, Schema as ArrowSchema}; + use crate::arrow::error::ArrowError; + use crate::arrow::json::writer::LineDelimitedWriter; + use crate::arrow::record_batch::RecordBatch; struct ExprEngine(Arc); @@ -371,16 +371,15 @@ mod tests { } } - fn build_map(entries: Vec<(&str, &str)>) -> arrow_array::MapArray { + fn build_map(entries: Vec<(&str, &str)>) -> MapArray { let key_builder = StringBuilder::new(); let val_builder = StringBuilder::new(); - let names = arrow_array::builder::MapFieldNames { + let names = MapFieldNames { entry: "entries".to_string(), key: "key".to_string(), value: "value".to_string(), }; - let mut builder = - arrow_array::builder::MapBuilder::new(Some(names), key_builder, val_builder); + let mut builder = MapBuilder::new(Some(names), key_builder, val_builder); for (key, val) in entries { builder.keys().append_value(key); builder.values().append_value(val); @@ -494,7 +493,7 @@ mod tests { engine_commit_info_schema, vec![ Arc::new(map_array), - Arc::new(arrow_array::StringArray::from(vec!["some_string"])), + Arc::new(StringArray::from(vec!["some_string"])), ], )?; @@ -533,7 +532,7 @@ mod tests { )])); let commit_info_batch = RecordBatch::try_new( engine_commit_info_schema, - vec![Arc::new(arrow_array::StringArray::new_null(1))], + vec![Arc::new(StringArray::new_null(1))], )?; let _ = generate_commit_info( @@ -542,12 +541,9 @@ mod tests { &ArrowEngineData::new(commit_info_batch), ) .map_err(|e| match e { - Error::Arrow(arrow_schema::ArrowError::SchemaError(_)) => (), + Error::Arrow(ArrowError::SchemaError(_)) => (), Error::Backtraced { source, .. } - if matches!( - &*source, - Error::Arrow(arrow_schema::ArrowError::SchemaError(_)) - ) => {} + if matches!(&*source, Error::Arrow(ArrowError::SchemaError(_))) => {} _ => panic!("expected arrow schema error error, got {:?}", e), }); @@ -564,7 +560,7 @@ mod tests { )])); let commit_info_batch = RecordBatch::try_new( engine_commit_info_schema, - vec![Arc::new(arrow_array::StringArray::new_null(1))], + vec![Arc::new(StringArray::new_null(1))], )?; let _ = generate_commit_info( @@ -573,12 +569,9 @@ mod tests { &ArrowEngineData::new(commit_info_batch), ) .map_err(|e| match e { - Error::Arrow(arrow_schema::ArrowError::InvalidArgumentError(_)) => (), + Error::Arrow(ArrowError::InvalidArgumentError(_)) => (), Error::Backtraced { source, .. } - if matches!( - &*source, - Error::Arrow(arrow_schema::ArrowError::InvalidArgumentError(_)) - ) => {} + if matches!(&*source, Error::Arrow(ArrowError::InvalidArgumentError(_))) => {} _ => panic!("expected arrow invalid arg error, got {:?}", e), }); @@ -644,16 +637,16 @@ mod tests { ), true, )])); - use arrow_array::builder::StringBuilder; + let key_builder = StringBuilder::new(); let val_builder = StringBuilder::new(); - let names = arrow_array::builder::MapFieldNames { + let names = crate::arrow::array::MapFieldNames { entry: "entries".to_string(), key: "key".to_string(), value: "value".to_string(), }; let mut builder = - arrow_array::builder::MapBuilder::new(Some(names), key_builder, val_builder); + crate::arrow::array::MapBuilder::new(Some(names), key_builder, val_builder); builder.append(is_null).unwrap(); let array = builder.finish(); diff --git a/kernel/tests/cdf.rs b/kernel/tests/cdf.rs index 2560dc71d8..0690189515 100644 --- a/kernel/tests/cdf.rs +++ b/kernel/tests/cdf.rs @@ -1,7 +1,7 @@ use std::{error, sync::Arc}; -use arrow::compute::filter_record_batch; -use arrow_array::RecordBatch; +use delta_kernel::arrow::array::RecordBatch; +use delta_kernel::arrow::compute::filter_record_batch; use delta_kernel::engine::sync::SyncEngine; use itertools::Itertools; diff --git a/kernel/tests/common/mod.rs b/kernel/tests/common/mod.rs index a918695b74..4268f0626d 100644 --- a/kernel/tests/common/mod.rs +++ b/kernel/tests/common/mod.rs @@ -1,6 +1,6 @@ -use arrow::compute::filter_record_batch; -use arrow::record_batch::RecordBatch; -use arrow::util::pretty::pretty_format_batches; +use delta_kernel::arrow::compute::filter_record_batch; +use delta_kernel::arrow::record_batch::RecordBatch; +use delta_kernel::arrow::util::pretty::pretty_format_batches; use itertools::Itertools; use crate::ArrowEngineData; @@ -24,7 +24,7 @@ macro_rules! sort_lines { #[macro_export] macro_rules! assert_batches_sorted_eq { ($expected_lines_sorted: expr, $CHUNKS: expr) => { - let formatted = arrow::util::pretty::pretty_format_batches($CHUNKS) + let formatted = delta_kernel::arrow::util::pretty::pretty_format_batches($CHUNKS) .unwrap() .to_string(); // fix for windows: \r\n --> diff --git a/kernel/tests/golden_tables.rs b/kernel/tests/golden_tables.rs index 120271ef2d..2b1bc1a71a 100644 --- a/kernel/tests/golden_tables.rs +++ b/kernel/tests/golden_tables.rs @@ -3,23 +3,23 @@ //! Data (golden tables) are stored in tests/golden_data/.tar.zst //! Each table directory has a table/ and expected/ subdirectory with the input/output respectively -use arrow::array::AsArray; -use arrow::{compute::filter_record_batch, record_batch::RecordBatch}; -use arrow_ord::sort::{lexsort_to_indices, SortColumn}; -use arrow_schema::{FieldRef, Schema}; -use arrow_select::{concat::concat_batches, take::take}; +use delta_kernel::arrow::array::{Array, AsArray, StructArray}; +use delta_kernel::arrow::compute::{concat_batches, take}; +use delta_kernel::arrow::compute::{lexsort_to_indices, SortColumn}; +use delta_kernel::arrow::datatypes::{DataType, FieldRef, Schema}; +use delta_kernel::arrow::{compute::filter_record_batch, record_batch::RecordBatch}; use itertools::Itertools; use paste::paste; use std::path::{Path, PathBuf}; use std::sync::Arc; +use delta_kernel::parquet::arrow::async_reader::{ + ParquetObjectReader, ParquetRecordBatchStreamBuilder, +}; use delta_kernel::{engine::arrow_data::ArrowEngineData, DeltaResult, Table}; use futures::{stream::TryStreamExt, StreamExt}; use object_store::{local::LocalFileSystem, ObjectStore}; -use parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder}; -use arrow_array::{Array, StructArray}; -use arrow_schema::DataType; use delta_kernel::engine::default::executor::tokio::TokioBackgroundExecutor; use delta_kernel::engine::default::DefaultEngine; diff --git a/kernel/tests/read.rs b/kernel/tests/read.rs index 3f89875430..b5b0849d35 100644 --- a/kernel/tests/read.rs +++ b/kernel/tests/read.rs @@ -3,21 +3,20 @@ use std::ops::Not; use std::path::PathBuf; use std::sync::Arc; -use arrow::compute::filter_record_batch; -use arrow_schema::SchemaRef as ArrowSchemaRef; -use arrow_select::concat::concat_batches; use delta_kernel::actions::deletion_vector::split_vector; +use delta_kernel::arrow::compute::{concat_batches, filter_record_batch}; +use delta_kernel::arrow::datatypes::SchemaRef as ArrowSchemaRef; use delta_kernel::engine::arrow_data::ArrowEngineData; use delta_kernel::engine::default::executor::tokio::TokioBackgroundExecutor; use delta_kernel::engine::default::DefaultEngine; use delta_kernel::expressions::{column_expr, BinaryOperator, Expression, ExpressionRef}; +use delta_kernel::parquet::file::properties::{EnabledStatistics, WriterProperties}; use delta_kernel::scan::state::{transform_to_logical, visit_scan_files, DvInfo, Stats}; use delta_kernel::scan::Scan; use delta_kernel::schema::{DataType, Schema}; use delta_kernel::{Engine, FileMeta, Table}; use itertools::Itertools; use object_store::{memory::InMemory, path::Path, ObjectStore}; -use parquet::file::properties::{EnabledStatistics, WriterProperties}; use test_utils::{ actions_to_string, add_commit, generate_batch, generate_simple_batch, into_record_batch, record_batch_to_bytes, record_batch_to_bytes_with_props, IntoArray, TestAction, METADATA, diff --git a/kernel/tests/write.rs b/kernel/tests/write.rs index 6335be7dd8..3a62046e7b 100644 --- a/kernel/tests/write.rs +++ b/kernel/tests/write.rs @@ -1,10 +1,12 @@ use std::collections::HashMap; use std::sync::Arc; -use arrow::array::{Int32Array, StringArray}; -use arrow::record_batch::RecordBatch; -use arrow_schema::Schema as ArrowSchema; -use arrow_schema::{DataType as ArrowDataType, Field}; +use delta_kernel::arrow::array::{ + Int32Array, MapBuilder, MapFieldNames, StringArray, StringBuilder, +}; +use delta_kernel::arrow::datatypes::{DataType as ArrowDataType, Field, Schema as ArrowSchema}; +use delta_kernel::arrow::error::ArrowError; +use delta_kernel::arrow::record_batch::RecordBatch; use itertools::Itertools; use object_store::local::LocalFileSystem; use object_store::memory::InMemory; @@ -120,15 +122,14 @@ fn new_commit_info() -> DeltaResult> { false, )])); - use arrow_array::builder::StringBuilder; let key_builder = StringBuilder::new(); let val_builder = StringBuilder::new(); - let names = arrow_array::builder::MapFieldNames { + let names = MapFieldNames { entry: "entries".to_string(), key: "key".to_string(), value: "value".to_string(), }; - let mut builder = arrow_array::builder::MapBuilder::new(Some(names), key_builder, val_builder); + let mut builder = MapBuilder::new(Some(names), key_builder, val_builder); builder.keys().append_value("engineInfo"); builder.values().append_value("default engine"); builder.append(true).unwrap(); @@ -349,7 +350,7 @@ async fn test_append() -> Result<(), Box> { let append_data = [[1, 2, 3], [4, 5, 6]].map(|data| -> DeltaResult<_> { let data = RecordBatch::try_new( Arc::new(schema.as_ref().try_into()?), - vec![Arc::new(arrow::array::Int32Array::from(data.to_vec()))], + vec![Arc::new(Int32Array::from(data.to_vec()))], )?; Ok(Box::new(ArrowEngineData::new(data))) }); @@ -441,9 +442,7 @@ async fn test_append() -> Result<(), Box> { test_read( &ArrowEngineData::new(RecordBatch::try_new( Arc::new(schema.as_ref().try_into()?), - vec![Arc::new(arrow::array::Int32Array::from(vec![ - 1, 2, 3, 4, 5, 6, - ]))], + vec![Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6]))], )?), &table, engine, @@ -487,7 +486,7 @@ async fn test_append_partitioned() -> Result<(), Box> { let append_data = [[1, 2, 3], [4, 5, 6]].map(|data| -> DeltaResult<_> { let data = RecordBatch::try_new( Arc::new(data_schema.as_ref().try_into()?), - vec![Arc::new(arrow::array::Int32Array::from(data.to_vec()))], + vec![Arc::new(Int32Array::from(data.to_vec()))], )?; Ok(Box::new(ArrowEngineData::new(data))) }); @@ -627,7 +626,7 @@ async fn test_append_invalid_schema() -> Result<(), Box> let append_data = [["a", "b"], ["c", "d"]].map(|data| -> DeltaResult<_> { let data = RecordBatch::try_new( Arc::new(data_schema.as_ref().try_into()?), - vec![Arc::new(arrow::array::StringArray::from(data.to_vec()))], + vec![Arc::new(StringArray::from(data.to_vec()))], )?; Ok(Box::new(ArrowEngineData::new(data))) }); @@ -653,12 +652,9 @@ async fn test_append_invalid_schema() -> Result<(), Box> let mut write_metadata = futures::future::join_all(tasks).await.into_iter().flatten(); assert!(write_metadata.all(|res| match res { - Err(KernelError::Arrow(arrow_schema::ArrowError::SchemaError(_))) => true, + Err(KernelError::Arrow(ArrowError::SchemaError(_))) => true, Err(KernelError::Backtraced { source, .. }) - if matches!( - &*source, - KernelError::Arrow(arrow_schema::ArrowError::SchemaError(_)) - ) => + if matches!(&*source, KernelError::Arrow(ArrowError::SchemaError(_))) => true, _ => false, })); diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml index 0a90e96ede..b602b2e68d 100644 --- a/test-utils/Cargo.toml +++ b/test-utils/Cargo.toml @@ -12,9 +12,6 @@ version.workspace = true release = false [dependencies] -arrow-array = { workspace = true, features = ["chrono-tz"] } -arrow-schema = { workspace = true } -delta_kernel = { path = "../kernel", features = [ "default-engine" ] } +delta_kernel = { path = "../kernel", features = [ "default-engine" ] } itertools = "0.13.0" object_store = { workspace = true } -parquet = { workspace = true } diff --git a/test-utils/src/lib.rs b/test-utils/src/lib.rs index 0aeee887d7..e8747c539a 100644 --- a/test-utils/src/lib.rs +++ b/test-utils/src/lib.rs @@ -2,14 +2,14 @@ use std::sync::Arc; -use arrow_array::{ArrayRef, Int32Array, RecordBatch, StringArray}; -use arrow_schema::ArrowError; +use delta_kernel::arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray}; +use delta_kernel::arrow::error::ArrowError; use delta_kernel::engine::arrow_data::ArrowEngineData; +use delta_kernel::parquet::arrow::arrow_writer::ArrowWriter; +use delta_kernel::parquet::file::properties::WriterProperties; use delta_kernel::EngineData; use itertools::Itertools; use object_store::{path::Path, ObjectStore}; -use parquet::arrow::arrow_writer::ArrowWriter; -use parquet::file::properties::WriterProperties; /// A common useful initial metadata and protocol. Also includes a single commitInfo pub const METADATA: &str = r#"{"commitInfo":{"timestamp":1587968586154,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}} From b07fc6d77614ddea82806cfd37d0792aae012644 Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Thu, 20 Feb 2025 15:56:44 -0800 Subject: [PATCH 11/38] feat: Support writing to not only 3/7 protocol (#693) Our previous write protocol check was too strict. Now we just ensure that the protocol makes sense given what features are present/specified. Made all existing `write.rs` tests also write to a protocol 1/1 table, and they all work. --- kernel/src/actions/mod.rs | 27 +- kernel/tests/write.rs | 778 ++++++++++++++++++++------------------ 2 files changed, 429 insertions(+), 376 deletions(-) diff --git a/kernel/src/actions/mod.rs b/kernel/src/actions/mod.rs index 8bcb5df505..cb0a9c9cda 100644 --- a/kernel/src/actions/mod.rs +++ b/kernel/src/actions/mod.rs @@ -276,17 +276,26 @@ impl Protocol { /// support the specified protocol writer version and all enabled writer features? pub fn ensure_write_supported(&self) -> DeltaResult<()> { match &self.writer_features { - // if min_reader_version = 3 and min_writer_version = 7 and all writer features are - // supported => OK - Some(writer_features) - if self.min_reader_version == 3 && self.min_writer_version == 7 => - { + Some(writer_features) if self.min_writer_version == 7 => { + // if we're on version 7, make sure we support all the specified features ensure_supported_features(writer_features, &SUPPORTED_WRITER_FEATURES) } - // otherwise not supported - _ => Err(Error::unsupported( - "Only tables with min reader version 3 and min writer version 7 with no table features are supported." - )), + Some(_) => { + // there are features, but we're not on 7, so the protocol is actually broken + Err(Error::unsupported( + "Tables with min writer version != 7 should not have table features.", + )) + } + None => { + // no features, we currently only support version 1 in this case + require!( + self.min_writer_version == 1, + Error::unsupported( + "Currently delta-kernel-rs can only write to tables with protocol.minWriterVersion = 1 or 7" + ) + ); + Ok(()) + } } } } diff --git a/kernel/tests/write.rs b/kernel/tests/write.rs index 3a62046e7b..eb3671595b 100644 --- a/kernel/tests/write.rs +++ b/kernel/tests/write.rs @@ -60,18 +60,28 @@ async fn create_table( table_path: Url, schema: SchemaRef, partition_columns: &[&str], + use_37_protocol: bool, ) -> Result> { let table_id = "test_id"; let schema = serde_json::to_string(&schema)?; - let protocol = json!({ - "protocol": { - "minReaderVersion": 3, - "minWriterVersion": 7, - "readerFeatures": [], - "writerFeatures": [] - } - }); + let protocol = if use_37_protocol { + json!({ + "protocol": { + "minReaderVersion": 3, + "minWriterVersion": 7, + "readerFeatures": [], + "writerFeatures": [] + } + }) + } else { + json!({ + "protocol": { + "minReaderVersion": 1, + "minWriterVersion": 1, + } + }) + }; let metadata = json!({ "metaData": { "id": table_id, @@ -140,56 +150,99 @@ fn new_commit_info() -> DeltaResult> { Ok(Box::new(ArrowEngineData::new(commit_info_batch))) } +async fn setup_tables( + schema: SchemaRef, + partition_columns: &[&str], +) -> Result< + Vec<( + Table, + DefaultEngine, + Arc, + &'static str, + )>, + Box, +> { + let (store_37, engine_37, table_location_37) = setup("test_table_37", true); + let (store_11, engine_11, table_location_11) = setup("test_table_11", true); + Ok(vec![ + ( + create_table( + store_37.clone(), + table_location_37, + schema.clone(), + partition_columns, + true, + ) + .await?, + engine_37, + store_37, + "test_table_37", + ), + ( + create_table( + store_11.clone(), + table_location_11, + schema, + partition_columns, + false, + ) + .await?, + engine_11, + store_11, + "test_table_11", + ), + ]) +} + #[tokio::test] async fn test_commit_info() -> Result<(), Box> { // setup tracing let _ = tracing_subscriber::fmt::try_init(); - // setup in-memory object store and default engine - let (store, engine, table_location) = setup("test_table", true); // create a simple table: one int column named 'number' let schema = Arc::new(StructType::new(vec![StructField::nullable( "number", DataType::INTEGER, )])); - let table = create_table(store.clone(), table_location, schema, &[]).await?; - - let commit_info = new_commit_info()?; - - // create a transaction - let txn = table - .new_transaction(&engine)? - .with_commit_info(commit_info); - - // commit! - txn.commit(&engine)?; - - let commit1 = store - .get(&Path::from( - "/test_table/_delta_log/00000000000000000001.json", - )) - .await?; - - let mut parsed_commit: serde_json::Value = serde_json::from_slice(&commit1.bytes().await?)?; - *parsed_commit - .get_mut("commitInfo") - .unwrap() - .get_mut("timestamp") - .unwrap() = serde_json::Value::Number(0.into()); - - let expected_commit = json!({ - "commitInfo": { - "timestamp": 0, - "operation": "UNKNOWN", - "kernelVersion": format!("v{}", env!("CARGO_PKG_VERSION")), - "operationParameters": {}, - "engineCommitInfo": { - "engineInfo": "default engine" + + for (table, engine, store, table_name) in setup_tables(schema, &[]).await? { + let commit_info = new_commit_info()?; + + // create a transaction + let txn = table + .new_transaction(&engine)? + .with_commit_info(commit_info); + + // commit! + txn.commit(&engine)?; + + let commit1 = store + .get(&Path::from(format!( + "/{table_name}/_delta_log/00000000000000000001.json" + ))) + .await?; + + let mut parsed_commit: serde_json::Value = serde_json::from_slice(&commit1.bytes().await?)?; + *parsed_commit + .get_mut("commitInfo") + .unwrap() + .get_mut("timestamp") + .unwrap() = serde_json::Value::Number(0.into()); + + let expected_commit = json!({ + "commitInfo": { + "timestamp": 0, + "operation": "UNKNOWN", + "kernelVersion": format!("v{}", env!("CARGO_PKG_VERSION")), + "operationParameters": {}, + "engineCommitInfo": { + "engineInfo": "default engine" + } } - } - }); + }); - assert_eq!(parsed_commit, expected_commit); + assert_eq!(parsed_commit, expected_commit); + } Ok(()) } @@ -197,21 +250,18 @@ async fn test_commit_info() -> Result<(), Box> { async fn test_empty_commit() -> Result<(), Box> { // setup tracing let _ = tracing_subscriber::fmt::try_init(); - // setup in-memory object store and default engine - let (store, engine, table_location) = setup("test_table", true); - // create a simple table: one int column named 'number' let schema = Arc::new(StructType::new(vec![StructField::nullable( "number", DataType::INTEGER, )])); - let table = create_table(store.clone(), table_location, schema, &[]).await?; - - assert!(matches!( - table.new_transaction(&engine)?.commit(&engine).unwrap_err(), - KernelError::MissingCommitInfo - )); + for (table, engine, _store, _table_name) in setup_tables(schema, &[]).await? { + assert!(matches!( + table.new_transaction(&engine)?.commit(&engine).unwrap_err(), + KernelError::MissingCommitInfo + )); + } Ok(()) } @@ -219,53 +269,51 @@ async fn test_empty_commit() -> Result<(), Box> { async fn test_invalid_commit_info() -> Result<(), Box> { // setup tracing let _ = tracing_subscriber::fmt::try_init(); - // setup in-memory object store and default engine - let (store, engine, table_location) = setup("test_table", true); // create a simple table: one int column named 'number' let schema = Arc::new(StructType::new(vec![StructField::nullable( "number", DataType::INTEGER, )])); - let table = create_table(store.clone(), table_location, schema, &[]).await?; - - // empty commit info test - let commit_info_schema = Arc::new(ArrowSchema::empty()); - let commit_info_batch = RecordBatch::new_empty(commit_info_schema.clone()); - assert!(commit_info_batch.num_rows() == 0); - let txn = table - .new_transaction(&engine)? - .with_commit_info(Box::new(ArrowEngineData::new(commit_info_batch))); - - // commit! - assert!(matches!( - txn.commit(&engine), - Err(KernelError::InvalidCommitInfo(_)) - )); - - // two-row commit info test - let commit_info_schema = Arc::new(ArrowSchema::new(vec![Field::new( - "engineInfo", - ArrowDataType::Utf8, - true, - )])); - let commit_info_batch = RecordBatch::try_new( - commit_info_schema.clone(), - vec![Arc::new(StringArray::from(vec![ - "row1: default engine", - "row2: default engine", - ]))], - )?; - - let txn = table - .new_transaction(&engine)? - .with_commit_info(Box::new(ArrowEngineData::new(commit_info_batch))); - - // commit! - assert!(matches!( - txn.commit(&engine), - Err(KernelError::InvalidCommitInfo(_)) - )); + for (table, engine, _store, _table_name) in setup_tables(schema, &[]).await? { + // empty commit info test + let commit_info_schema = Arc::new(ArrowSchema::empty()); + let commit_info_batch = RecordBatch::new_empty(commit_info_schema.clone()); + assert!(commit_info_batch.num_rows() == 0); + let txn = table + .new_transaction(&engine)? + .with_commit_info(Box::new(ArrowEngineData::new(commit_info_batch))); + + // commit! + assert!(matches!( + txn.commit(&engine), + Err(KernelError::InvalidCommitInfo(_)) + )); + + // two-row commit info test + let commit_info_schema = Arc::new(ArrowSchema::new(vec![Field::new( + "engineInfo", + ArrowDataType::Utf8, + true, + )])); + let commit_info_batch = RecordBatch::try_new( + commit_info_schema.clone(), + vec![Arc::new(StringArray::from(vec![ + "row1: default engine", + "row2: default engine", + ]))], + )?; + + let txn = table + .new_transaction(&engine)? + .with_commit_info(Box::new(ArrowEngineData::new(commit_info_batch))); + + // commit! + assert!(matches!( + txn.commit(&engine), + Err(KernelError::InvalidCommitInfo(_)) + )); + } Ok(()) } @@ -330,123 +378,123 @@ async fn get_and_check_all_parquet_sizes(store: Arc, path: &str async fn test_append() -> Result<(), Box> { // setup tracing let _ = tracing_subscriber::fmt::try_init(); - // setup in-memory object store and default engine - let (store, engine, table_location) = setup("test_table", true); - // create a simple table: one int column named 'number' let schema = Arc::new(StructType::new(vec![StructField::nullable( "number", DataType::INTEGER, )])); - let table = create_table(store.clone(), table_location, schema.clone(), &[]).await?; - let commit_info = new_commit_info()?; + for (table, engine, store, table_name) in setup_tables(schema.clone(), &[]).await? { + let commit_info = new_commit_info()?; - let mut txn = table - .new_transaction(&engine)? - .with_commit_info(commit_info); + let mut txn = table + .new_transaction(&engine)? + .with_commit_info(commit_info); - // create two new arrow record batches to append - let append_data = [[1, 2, 3], [4, 5, 6]].map(|data| -> DeltaResult<_> { - let data = RecordBatch::try_new( - Arc::new(schema.as_ref().try_into()?), - vec![Arc::new(Int32Array::from(data.to_vec()))], - )?; - Ok(Box::new(ArrowEngineData::new(data))) - }); + // create two new arrow record batches to append + let append_data = [[1, 2, 3], [4, 5, 6]].map(|data| -> DeltaResult<_> { + let data = RecordBatch::try_new( + Arc::new(schema.as_ref().try_into()?), + vec![Arc::new(Int32Array::from(data.to_vec()))], + )?; + Ok(Box::new(ArrowEngineData::new(data))) + }); - // write data out by spawning async tasks to simulate executors - let engine = Arc::new(engine); - let write_context = Arc::new(txn.get_write_context()); - let tasks = append_data.into_iter().map(|data| { - // arc clones - let engine = engine.clone(); - let write_context = write_context.clone(); - tokio::task::spawn(async move { - engine - .write_parquet( - data.as_ref().unwrap(), - write_context.as_ref(), - HashMap::new(), - true, - ) - .await - }) - }); + // write data out by spawning async tasks to simulate executors + let engine = Arc::new(engine); + let write_context = Arc::new(txn.get_write_context()); + let tasks = append_data.into_iter().map(|data| { + // arc clones + let engine = engine.clone(); + let write_context = write_context.clone(); + tokio::task::spawn(async move { + engine + .write_parquet( + data.as_ref().unwrap(), + write_context.as_ref(), + HashMap::new(), + true, + ) + .await + }) + }); - let write_metadata = futures::future::join_all(tasks).await.into_iter().flatten(); - for meta in write_metadata { - txn.add_write_metadata(meta?); - } + let write_metadata = futures::future::join_all(tasks).await.into_iter().flatten(); + for meta in write_metadata { + txn.add_write_metadata(meta?); + } - // commit! - txn.commit(engine.as_ref())?; - - let commit1 = store - .get(&Path::from( - "/test_table/_delta_log/00000000000000000001.json", - )) - .await?; - - let mut parsed_commits: Vec<_> = Deserializer::from_slice(&commit1.bytes().await?) - .into_iter::() - .try_collect()?; - - let size = get_and_check_all_parquet_sizes(store.clone(), "/test_table/").await; - // check that the timestamps in commit_info and add actions are within 10s of SystemTime::now() - // before we clear them for comparison - check_action_timestamps(parsed_commits.iter())?; - - // set timestamps to 0 and paths to known string values for comparison - // (otherwise timestamps are non-deterministic and paths are random UUIDs) - set_value(&mut parsed_commits[0], "commitInfo.timestamp", json!(0))?; - set_value(&mut parsed_commits[1], "add.modificationTime", json!(0))?; - set_value(&mut parsed_commits[1], "add.path", json!("first.parquet"))?; - set_value(&mut parsed_commits[2], "add.modificationTime", json!(0))?; - set_value(&mut parsed_commits[2], "add.path", json!("second.parquet"))?; - - let expected_commit = vec![ - json!({ - "commitInfo": { - "timestamp": 0, - "operation": "UNKNOWN", - "kernelVersion": format!("v{}", env!("CARGO_PKG_VERSION")), - "operationParameters": {}, - "engineCommitInfo": { - "engineInfo": "default engine" + // commit! + txn.commit(engine.as_ref())?; + + let commit1 = store + .get(&Path::from(format!( + "/{table_name}/_delta_log/00000000000000000001.json" + ))) + .await?; + + let mut parsed_commits: Vec<_> = Deserializer::from_slice(&commit1.bytes().await?) + .into_iter::() + .try_collect()?; + + let size = + get_and_check_all_parquet_sizes(store.clone(), format!("/{table_name}/").as_str()) + .await; + // check that the timestamps in commit_info and add actions are within 10s of SystemTime::now() + // before we clear them for comparison + check_action_timestamps(parsed_commits.iter())?; + + // set timestamps to 0 and paths to known string values for comparison + // (otherwise timestamps are non-deterministic and paths are random UUIDs) + set_value(&mut parsed_commits[0], "commitInfo.timestamp", json!(0))?; + set_value(&mut parsed_commits[1], "add.modificationTime", json!(0))?; + set_value(&mut parsed_commits[1], "add.path", json!("first.parquet"))?; + set_value(&mut parsed_commits[2], "add.modificationTime", json!(0))?; + set_value(&mut parsed_commits[2], "add.path", json!("second.parquet"))?; + + let expected_commit = vec![ + json!({ + "commitInfo": { + "timestamp": 0, + "operation": "UNKNOWN", + "kernelVersion": format!("v{}", env!("CARGO_PKG_VERSION")), + "operationParameters": {}, + "engineCommitInfo": { + "engineInfo": "default engine" + } } - } - }), - json!({ - "add": { - "path": "first.parquet", - "partitionValues": {}, - "size": size, - "modificationTime": 0, - "dataChange": true - } - }), - json!({ - "add": { - "path": "second.parquet", - "partitionValues": {}, - "size": size, - "modificationTime": 0, - "dataChange": true - } - }), - ]; - - assert_eq!(parsed_commits, expected_commit); - - test_read( - &ArrowEngineData::new(RecordBatch::try_new( - Arc::new(schema.as_ref().try_into()?), - vec![Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6]))], - )?), - &table, - engine, - )?; + }), + json!({ + "add": { + "path": "first.parquet", + "partitionValues": {}, + "size": size, + "modificationTime": 0, + "dataChange": true + } + }), + json!({ + "add": { + "path": "second.parquet", + "partitionValues": {}, + "size": size, + "modificationTime": 0, + "dataChange": true + } + }), + ]; + + assert_eq!(parsed_commits, expected_commit); + + test_read( + &ArrowEngineData::new(RecordBatch::try_new( + Arc::new(schema.as_ref().try_into()?), + vec![Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6]))], + )?), + &table, + engine, + )?; + } Ok(()) } @@ -454,8 +502,7 @@ async fn test_append() -> Result<(), Box> { async fn test_append_partitioned() -> Result<(), Box> { // setup tracing let _ = tracing_subscriber::fmt::try_init(); - // setup in-memory object store and default engine - let (store, engine, table_location) = setup("test_table", true); + let partition_col = "partition"; // create a simple partitioned table: one int column named 'number', partitioned by string @@ -468,132 +515,131 @@ async fn test_append_partitioned() -> Result<(), Box> { "number", DataType::INTEGER, )])); - let table = create_table( - store.clone(), - table_location, - table_schema.clone(), - &[partition_col], - ) - .await?; - - let commit_info = new_commit_info()?; - - let mut txn = table - .new_transaction(&engine)? - .with_commit_info(commit_info); - - // create two new arrow record batches to append - let append_data = [[1, 2, 3], [4, 5, 6]].map(|data| -> DeltaResult<_> { - let data = RecordBatch::try_new( - Arc::new(data_schema.as_ref().try_into()?), - vec![Arc::new(Int32Array::from(data.to_vec()))], - )?; - Ok(Box::new(ArrowEngineData::new(data))) - }); - let partition_vals = vec!["a", "b"]; - // write data out by spawning async tasks to simulate executors - let engine = Arc::new(engine); - let write_context = Arc::new(txn.get_write_context()); - let tasks = append_data - .into_iter() - .zip(partition_vals) - .map(|(data, partition_val)| { - // arc clones - let engine = engine.clone(); - let write_context = write_context.clone(); - tokio::task::spawn(async move { - engine - .write_parquet( - data.as_ref().unwrap(), - write_context.as_ref(), - HashMap::from([(partition_col.to_string(), partition_val.to_string())]), - true, - ) - .await - }) + for (table, engine, store, table_name) in + setup_tables(table_schema.clone(), &[partition_col]).await? + { + let commit_info = new_commit_info()?; + + let mut txn = table + .new_transaction(&engine)? + .with_commit_info(commit_info); + + // create two new arrow record batches to append + let append_data = [[1, 2, 3], [4, 5, 6]].map(|data| -> DeltaResult<_> { + let data = RecordBatch::try_new( + Arc::new(data_schema.as_ref().try_into()?), + vec![Arc::new(Int32Array::from(data.to_vec()))], + )?; + Ok(Box::new(ArrowEngineData::new(data))) }); + let partition_vals = vec!["a", "b"]; + + // write data out by spawning async tasks to simulate executors + let engine = Arc::new(engine); + let write_context = Arc::new(txn.get_write_context()); + let tasks = append_data + .into_iter() + .zip(partition_vals) + .map(|(data, partition_val)| { + // arc clones + let engine = engine.clone(); + let write_context = write_context.clone(); + tokio::task::spawn(async move { + engine + .write_parquet( + data.as_ref().unwrap(), + write_context.as_ref(), + HashMap::from([(partition_col.to_string(), partition_val.to_string())]), + true, + ) + .await + }) + }); + + let write_metadata = futures::future::join_all(tasks).await.into_iter().flatten(); + for meta in write_metadata { + txn.add_write_metadata(meta?); + } - let write_metadata = futures::future::join_all(tasks).await.into_iter().flatten(); - for meta in write_metadata { - txn.add_write_metadata(meta?); - } - - // commit! - txn.commit(engine.as_ref())?; - - let commit1 = store - .get(&Path::from( - "/test_table/_delta_log/00000000000000000001.json", - )) - .await?; - - let mut parsed_commits: Vec<_> = Deserializer::from_slice(&commit1.bytes().await?) - .into_iter::() - .try_collect()?; - - let size = get_and_check_all_parquet_sizes(store.clone(), "/test_table/").await; - // check that the timestamps in commit_info and add actions are within 10s of SystemTime::now() - // before we clear them for comparison - check_action_timestamps(parsed_commits.iter())?; - - // set timestamps to 0 and paths to known string values for comparison - // (otherwise timestamps are non-deterministic and paths are random UUIDs) - set_value(&mut parsed_commits[0], "commitInfo.timestamp", json!(0))?; - set_value(&mut parsed_commits[1], "add.modificationTime", json!(0))?; - set_value(&mut parsed_commits[1], "add.path", json!("first.parquet"))?; - set_value(&mut parsed_commits[2], "add.modificationTime", json!(0))?; - set_value(&mut parsed_commits[2], "add.path", json!("second.parquet"))?; - - let expected_commit = vec![ - json!({ - "commitInfo": { - "timestamp": 0, - "operation": "UNKNOWN", - "kernelVersion": format!("v{}", env!("CARGO_PKG_VERSION")), - "operationParameters": {}, - "engineCommitInfo": { - "engineInfo": "default engine" + // commit! + txn.commit(engine.as_ref())?; + + let commit1 = store + .get(&Path::from(format!( + "/{table_name}/_delta_log/00000000000000000001.json" + ))) + .await?; + + let mut parsed_commits: Vec<_> = Deserializer::from_slice(&commit1.bytes().await?) + .into_iter::() + .try_collect()?; + + let size = + get_and_check_all_parquet_sizes(store.clone(), format!("/{table_name}/").as_str()) + .await; + // check that the timestamps in commit_info and add actions are within 10s of SystemTime::now() + // before we clear them for comparison + check_action_timestamps(parsed_commits.iter())?; + + // set timestamps to 0 and paths to known string values for comparison + // (otherwise timestamps are non-deterministic and paths are random UUIDs) + set_value(&mut parsed_commits[0], "commitInfo.timestamp", json!(0))?; + set_value(&mut parsed_commits[1], "add.modificationTime", json!(0))?; + set_value(&mut parsed_commits[1], "add.path", json!("first.parquet"))?; + set_value(&mut parsed_commits[2], "add.modificationTime", json!(0))?; + set_value(&mut parsed_commits[2], "add.path", json!("second.parquet"))?; + + let expected_commit = vec![ + json!({ + "commitInfo": { + "timestamp": 0, + "operation": "UNKNOWN", + "kernelVersion": format!("v{}", env!("CARGO_PKG_VERSION")), + "operationParameters": {}, + "engineCommitInfo": { + "engineInfo": "default engine" + } } - } - }), - json!({ - "add": { - "path": "first.parquet", - "partitionValues": { - "partition": "a" - }, - "size": size, - "modificationTime": 0, - "dataChange": true - } - }), - json!({ - "add": { - "path": "second.parquet", - "partitionValues": { - "partition": "b" - }, - "size": size, - "modificationTime": 0, - "dataChange": true - } - }), - ]; - - assert_eq!(parsed_commits, expected_commit); - - test_read( - &ArrowEngineData::new(RecordBatch::try_new( - Arc::new(table_schema.as_ref().try_into()?), - vec![ - Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6])), - Arc::new(StringArray::from(vec!["a", "a", "a", "b", "b", "b"])), - ], - )?), - &table, - engine, - )?; + }), + json!({ + "add": { + "path": "first.parquet", + "partitionValues": { + "partition": "a" + }, + "size": size, + "modificationTime": 0, + "dataChange": true + } + }), + json!({ + "add": { + "path": "second.parquet", + "partitionValues": { + "partition": "b" + }, + "size": size, + "modificationTime": 0, + "dataChange": true + } + }), + ]; + + assert_eq!(parsed_commits, expected_commit); + + test_read( + &ArrowEngineData::new(RecordBatch::try_new( + Arc::new(table_schema.as_ref().try_into()?), + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6])), + Arc::new(StringArray::from(vec!["a", "a", "a", "b", "b", "b"])), + ], + )?), + &table, + engine, + )?; + } Ok(()) } @@ -601,9 +647,6 @@ async fn test_append_partitioned() -> Result<(), Box> { async fn test_append_invalid_schema() -> Result<(), Box> { // setup tracing let _ = tracing_subscriber::fmt::try_init(); - // setup in-memory object store and default engine - let (store, engine, table_location) = setup("test_table", true); - // create a simple table: one int column named 'number' let table_schema = Arc::new(StructType::new(vec![StructField::nullable( "number", @@ -614,49 +657,50 @@ async fn test_append_invalid_schema() -> Result<(), Box> "string", DataType::STRING, )])); - let table = create_table(store.clone(), table_location, table_schema.clone(), &[]).await?; - let commit_info = new_commit_info()?; + for (table, engine, _store, _table_name) in setup_tables(table_schema, &[]).await? { + let commit_info = new_commit_info()?; - let txn = table - .new_transaction(&engine)? - .with_commit_info(commit_info); + let txn = table + .new_transaction(&engine)? + .with_commit_info(commit_info); - // create two new arrow record batches to append - let append_data = [["a", "b"], ["c", "d"]].map(|data| -> DeltaResult<_> { - let data = RecordBatch::try_new( - Arc::new(data_schema.as_ref().try_into()?), - vec![Arc::new(StringArray::from(data.to_vec()))], - )?; - Ok(Box::new(ArrowEngineData::new(data))) - }); + // create two new arrow record batches to append + let append_data = [["a", "b"], ["c", "d"]].map(|data| -> DeltaResult<_> { + let data = RecordBatch::try_new( + Arc::new(data_schema.as_ref().try_into()?), + vec![Arc::new(StringArray::from(data.to_vec()))], + )?; + Ok(Box::new(ArrowEngineData::new(data))) + }); - // write data out by spawning async tasks to simulate executors - let engine = Arc::new(engine); - let write_context = Arc::new(txn.get_write_context()); - let tasks = append_data.into_iter().map(|data| { - // arc clones - let engine = engine.clone(); - let write_context = write_context.clone(); - tokio::task::spawn(async move { - engine - .write_parquet( - data.as_ref().unwrap(), - write_context.as_ref(), - HashMap::new(), - true, - ) - .await - }) - }); + // write data out by spawning async tasks to simulate executors + let engine = Arc::new(engine); + let write_context = Arc::new(txn.get_write_context()); + let tasks = append_data.into_iter().map(|data| { + // arc clones + let engine = engine.clone(); + let write_context = write_context.clone(); + tokio::task::spawn(async move { + engine + .write_parquet( + data.as_ref().unwrap(), + write_context.as_ref(), + HashMap::new(), + true, + ) + .await + }) + }); - let mut write_metadata = futures::future::join_all(tasks).await.into_iter().flatten(); - assert!(write_metadata.all(|res| match res { - Err(KernelError::Arrow(ArrowError::SchemaError(_))) => true, - Err(KernelError::Backtraced { source, .. }) - if matches!(&*source, KernelError::Arrow(ArrowError::SchemaError(_))) => - true, - _ => false, - })); + let mut write_metadata = futures::future::join_all(tasks).await.into_iter().flatten(); + assert!(write_metadata.all(|res| match res { + Err(KernelError::Arrow(ArrowError::SchemaError(_))) => true, + Err(KernelError::Backtraced { source, .. }) + if matches!(&*source, KernelError::Arrow(ArrowError::SchemaError(_))) => + true, + _ => false, + })); + } Ok(()) } From 2e4bdfa7c2d520b7d58e553f8976a7591548891d Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Thu, 20 Feb 2025 16:51:01 -0800 Subject: [PATCH 12/38] Part 4: read_table.c uses transform in ffi (#614) Use new transform functionality to transform data over FFI. This lets us get rid of all the gross partition adding code in c :) In particular: - remove `add_partition_columns` in `arrow.c`, we don't need it anymore - expose ffi methods to get an expression evaluator and evaluate an expression from `c` - use the above to add an `apply_transform` function in `arrow.c` ## How was this change tested? - existing tests --- ffi/examples/read-table/arrow.c | 126 ++++++------------ ffi/examples/read-table/arrow.h | 4 +- ffi/examples/read-table/read_table.c | 8 +- ffi/examples/read-table/read_table.h | 1 + ffi/src/engine_funcs.rs | 115 +++++++++++++++- ffi/src/expressions/kernel.rs | 25 +++- ffi/src/lib.rs | 4 +- ffi/src/scan.rs | 65 ++++++++- .../expected-data/basic-partitioned.expected | 16 +-- kernel/src/engine/arrow_expression.rs | 5 + 10 files changed, 263 insertions(+), 106 deletions(-) diff --git a/ffi/examples/read-table/arrow.c b/ffi/examples/read-table/arrow.c index c6214df6b0..1068369975 100644 --- a/ffi/examples/read-table/arrow.c +++ b/ffi/examples/read-table/arrow.c @@ -11,6 +11,7 @@ ArrowContext* init_arrow_context() context->num_batches = 0; context->batches = NULL; context->cur_filter = NULL; + context->cur_transform = NULL; return context; } @@ -50,86 +51,10 @@ static GArrowRecordBatch* get_record_batch(FFI_ArrowArray* array, GArrowSchema* return record_batch; } -// Add columns to a record batch for each partition. In a "real" engine we would want to parse the -// string values into the correct data type. This program just adds all partition columns as strings -// for simplicity -static GArrowRecordBatch* add_partition_columns( - GArrowRecordBatch* record_batch, - PartitionList* partition_cols, - const CStringMap* partition_values) -{ - gint64 rows = garrow_record_batch_get_n_rows(record_batch); - gint64 cols = garrow_record_batch_get_n_columns(record_batch); - GArrowRecordBatch* cur_record_batch = record_batch; - GError* error = NULL; - for (uintptr_t i = 0; i < partition_cols->len; i++) { - char* col = partition_cols->cols[i]; - guint pos = cols + i; - KernelStringSlice key = { col, strlen(col) }; - char* partition_val = get_from_string_map(partition_values, key, allocate_string); - print_diag( - " Adding partition column '%s' with value '%s' at column %u\n", - col, - partition_val ? partition_val : "NULL", - pos); - GArrowStringArrayBuilder* builder = garrow_string_array_builder_new(); - for (gint64 i = 0; i < rows; i++) { - if (partition_val) { - garrow_string_array_builder_append_string(builder, partition_val, &error); - } else { - garrow_array_builder_append_null((GArrowArrayBuilder*)builder, &error); - } - if (report_g_error("Can't append to partition column builder", error)) { - break; - } - } - - if (partition_val) { - free(partition_val); - } - - if (error != NULL) { - printf("Giving up on column %s\n", col); - g_error_free(error); - g_object_unref(builder); - error = NULL; - continue; - } - - GArrowArray* partition_col = garrow_array_builder_finish((GArrowArrayBuilder*)builder, &error); - if (report_g_error("Can't build string array for partition column", error)) { - printf("Giving up on column %s\n", col); - g_error_free(error); - g_object_unref(builder); - error = NULL; - continue; - } - g_object_unref(builder); - - GArrowDataType* string_data_type = (GArrowDataType*)garrow_string_data_type_new(); - GArrowField* field = garrow_field_new(col, string_data_type); - GArrowRecordBatch* old_batch = cur_record_batch; - cur_record_batch = garrow_record_batch_add_column(old_batch, pos, field, partition_col, &error); - g_object_unref(old_batch); - g_object_unref(partition_col); - g_object_unref(string_data_type); - g_object_unref(field); - if (cur_record_batch == NULL) { - if (error != NULL) { - printf("Could not add column at %u: %s\n", pos, error->message); - g_error_free(error); - } - } - } - return cur_record_batch; -} - // append a batch to our context static void add_batch_to_context( ArrowContext* context, - ArrowFFIData* arrow_data, - PartitionList* partition_cols, - const CStringMap* partition_values) + ArrowFFIData* arrow_data) { GArrowSchema* schema = get_schema(&arrow_data->schema); GArrowRecordBatch* record_batch = get_record_batch(&arrow_data->array, schema); @@ -142,11 +67,6 @@ static void add_batch_to_context( g_object_unref(context->cur_filter); context->cur_filter = NULL; } - record_batch = add_partition_columns(record_batch, partition_cols, partition_values); - if (record_batch == NULL) { - printf("Failed to add partition columns, not adding batch\n"); - return; - } context->batches = g_list_append(context->batches, record_batch); context->num_batches++; print_diag( @@ -187,20 +107,52 @@ static GArrowBooleanArray* slice_to_arrow_bool_array(const KernelBoolSlice slice return (GArrowBooleanArray*)ret; } +// This will apply the transform in the context to the specified data. This consumes the passed +// ExclusiveEngineData and return a new transformed one +static ExclusiveEngineData* apply_transform( + struct EngineContext* context, + ExclusiveEngineData* data) { + if (!context->arrow_context->cur_transform) { + print_diag(" No transform needed"); + return data; + } + print_diag(" Applying transform\n"); + SharedExpressionEvaluator* evaluator = get_evaluator( + context->engine, + context->read_schema, // input schema + context->arrow_context->cur_transform, + context->logical_schema); // output schema + ExternResultHandleExclusiveEngineData transformed_res = evaluate( + context->engine, + &data, + evaluator); + free_engine_data(data); + free_evaluator(evaluator); + if (transformed_res.tag != OkHandleExclusiveEngineData) { + print_error("Failed to transform read data.", (Error*)transformed_res.err); + free_error((Error*)transformed_res.err); + return NULL; + } + return transformed_res.ok; +} + // This is the callback that will be called for each chunk of data read from the parquet file static void visit_read_data(void* vcontext, ExclusiveEngineData* data) { print_diag(" Converting read data to arrow\n"); struct EngineContext* context = vcontext; - ExternResultArrowFFIData arrow_res = get_raw_arrow_data(data, context->engine); + ExclusiveEngineData* transformed = apply_transform(context, data); + if (!transformed) { + exit(-1); + } + ExternResultArrowFFIData arrow_res = get_raw_arrow_data(transformed, context->engine); if (arrow_res.tag != OkArrowFFIData) { print_error("Failed to get arrow data.", (Error*)arrow_res.err); free_error((Error*)arrow_res.err); exit(-1); } ArrowFFIData* arrow_data = arrow_res.ok; - add_batch_to_context( - context->arrow_context, arrow_data, context->partition_cols, context->partition_values); + add_batch_to_context(context->arrow_context, arrow_data); free(arrow_data); // just frees the struct, the data and schema are freed/owned by add_batch_to_context } @@ -208,7 +160,8 @@ static void visit_read_data(void* vcontext, ExclusiveEngineData* data) void c_read_parquet_file( struct EngineContext* context, const KernelStringSlice path, - const KernelBoolSlice selection_vector) + const KernelBoolSlice selection_vector, + const Expression* transform) { int full_len = strlen(context->table_root) + path.len + 1; char* full_path = malloc(sizeof(char) * full_len); @@ -233,6 +186,7 @@ void c_read_parquet_file( } context->arrow_context->cur_filter = sel_array; } + context->arrow_context->cur_transform = transform; ExclusiveFileReadResultIterator* read_iter = read_res.ok; for (;;) { ExternResultbool ok_res = read_result_next(read_iter, context, visit_read_data); diff --git a/ffi/examples/read-table/arrow.h b/ffi/examples/read-table/arrow.h index 0236b238b9..8f34cdd4fd 100644 --- a/ffi/examples/read-table/arrow.h +++ b/ffi/examples/read-table/arrow.h @@ -15,13 +15,15 @@ typedef struct ArrowContext gsize num_batches; GList* batches; GArrowBooleanArray* cur_filter; + const Expression* cur_transform; } ArrowContext; ArrowContext* init_arrow_context(void); void c_read_parquet_file( struct EngineContext* context, const KernelStringSlice path, - const KernelBoolSlice selection_vector); + const KernelBoolSlice selection_vector, + const Expression* transform); void print_arrow_context(ArrowContext* context); void free_arrow_context(ArrowContext* context); diff --git a/ffi/examples/read-table/read_table.c b/ffi/examples/read-table/read_table.c index 704559a59b..0ddc20ded4 100644 --- a/ffi/examples/read-table/read_table.c +++ b/ffi/examples/read-table/read_table.c @@ -50,6 +50,7 @@ void scan_row_callback( int64_t size, const Stats* stats, const DvInfo* dv_info, + const Expression* transform, const CStringMap* partition_values) { (void)size; // not using this at the moment @@ -76,7 +77,7 @@ void scan_row_callback( context->partition_values = partition_values; print_partition_info(context, partition_values); #ifdef PRINT_ARROW_DATA - c_read_parquet_file(context, path, selection_vector); + c_read_parquet_file(context, path, selection_vector, transform); #endif free_bool_slice(selection_vector); context->partition_values = NULL; @@ -273,10 +274,12 @@ int main(int argc, char* argv[]) SharedScan* scan = scan_res.ok; SharedGlobalScanState* global_state = get_global_scan_state(scan); + SharedSchema* logical_schema = get_global_logical_schema(global_state); SharedSchema* read_schema = get_global_read_schema(global_state); PartitionList* partition_cols = get_partition_list(global_state); struct EngineContext context = { global_state, + logical_schema, read_schema, table_root, engine, @@ -321,7 +324,8 @@ int main(int argc, char* argv[]) free_kernel_scan_data(data_iter); free_scan(scan); - free_global_read_schema(read_schema); + free_schema(logical_schema); + free_schema(read_schema); free_global_scan_state(global_state); free_snapshot(snapshot); free_engine(engine); diff --git a/ffi/examples/read-table/read_table.h b/ffi/examples/read-table/read_table.h index 28d9c72dc3..cf55863d9e 100644 --- a/ffi/examples/read-table/read_table.h +++ b/ffi/examples/read-table/read_table.h @@ -14,6 +14,7 @@ typedef struct PartitionList struct EngineContext { SharedGlobalScanState* global_state; + SharedSchema* logical_schema; SharedSchema* read_schema; char* table_root; SharedExternEngine* engine; diff --git a/ffi/src/engine_funcs.rs b/ffi/src/engine_funcs.rs index 1afb605106..7c12bcf51f 100644 --- a/ffi/src/engine_funcs.rs +++ b/ffi/src/engine_funcs.rs @@ -2,7 +2,10 @@ use std::sync::Arc; -use delta_kernel::{schema::Schema, DeltaResult, FileDataReadResultIterator}; +use delta_kernel::{ + schema::{DataType, Schema, SchemaRef}, + DeltaResult, EngineData, Expression, ExpressionEvaluator, FileDataReadResultIterator, +}; use delta_kernel_ffi_macros::handle_descriptor; use tracing::debug; use url::Url; @@ -97,7 +100,7 @@ pub unsafe extern "C" fn free_read_result_iter(data: Handle, + engine: Handle, // TODO Does this cause a free? file: &FileMeta, physical_schema: Handle, ) -> ExternResult> { @@ -130,3 +133,111 @@ fn read_parquet_file_impl( }); Ok(res.into()) } + +// Expression Eval + +#[handle_descriptor(target=dyn ExpressionEvaluator, mutable=false)] +pub struct SharedExpressionEvaluator; + +/// Get the evaluator as provided by the passed engines `ExpressionHandler`. +/// +/// # Safety +/// Caller is responsible for calling with a valid `Engine`, `Expression`, and `SharedSchema`s +#[no_mangle] +pub unsafe extern "C" fn get_evaluator( + engine: Handle, + input_schema: Handle, + expression: &Expression, + // TODO: Make this a data_type, and give a way for c code to go between schema <-> datatype + output_type: Handle, +) -> Handle { + let engine = unsafe { engine.clone_as_arc() }; + let input_schema = unsafe { input_schema.clone_as_arc() }; + let output_type: DataType = output_type.as_ref().clone().into(); + get_evaluator_impl(engine, input_schema, expression, output_type) +} + +fn get_evaluator_impl( + extern_engine: Arc, + input_schema: SchemaRef, + expression: &Expression, + output_type: DataType, +) -> Handle { + let engine = extern_engine.engine(); + let evaluator = engine.get_expression_handler().get_evaluator( + input_schema, + expression.clone(), + output_type, + ); + evaluator.into() +} + +/// Free an evaluator +/// # Safety +/// +/// Caller is responsible for passing a valid handle. +#[no_mangle] +pub unsafe extern "C" fn free_evaluator(evaluator: Handle) { + debug!("engine released evaluator"); + evaluator.drop_handle(); +} + +/// Use the passed `evaluator` to evaluate its expression against the passed `batch` data. +/// +/// # Safety +/// Caller is responsible for calling with a valid `Engine`, `ExclusiveEngineData`, and `Evaluator` +#[no_mangle] +pub unsafe extern "C" fn evaluate( + engine: Handle, + batch: &mut Handle, + evaluator: Handle, +) -> ExternResult> { + let engine = unsafe { engine.clone_as_arc() }; + let batch = unsafe { batch.as_mut() }; + let evaluator = unsafe { evaluator.clone_as_arc() }; + let res = evaluate_impl(batch, evaluator.as_ref()); + res.into_extern_result(&engine.as_ref()) +} + +fn evaluate_impl( + batch: &dyn EngineData, + evaluator: &dyn ExpressionEvaluator, +) -> DeltaResult> { + evaluator.evaluate(batch).map(Into::into) +} + +#[cfg(test)] +mod tests { + use super::{free_evaluator, get_evaluator}; + use crate::{free_engine, handle::Handle, scan::SharedSchema, tests::get_default_engine}; + use delta_kernel::{ + schema::{DataType, StructField, StructType}, + Expression, + }; + use std::sync::Arc; + + #[test] + fn test_get_evaluator() { + let engine = get_default_engine(); + let in_schema = Arc::new(StructType::new(vec![StructField::new( + "a", + DataType::LONG, + true, + )])); + let expr = Expression::literal(1); + let output_type: Handle = in_schema.clone().into(); + let in_schema_handle: Handle = in_schema.into(); + unsafe { + let evaluator = get_evaluator( + engine.shallow_copy(), + in_schema_handle.shallow_copy(), + &expr, + output_type.shallow_copy(), + ); + in_schema_handle.drop_handle(); + output_type.drop_handle(); + free_engine(engine); + free_evaluator(evaluator); + } + } +} diff --git a/ffi/src/expressions/kernel.rs b/ffi/src/expressions/kernel.rs index a5116db47c..a2a1dcd1f6 100644 --- a/ffi/src/expressions/kernel.rs +++ b/ffi/src/expressions/kernel.rs @@ -189,6 +189,29 @@ pub struct EngineExpressionVisitor { pub unsafe extern "C" fn visit_expression( expression: &Handle, visitor: &mut EngineExpressionVisitor, +) -> usize { + visit_expression_internal(expression.as_ref(), visitor) +} + +/// Visit the expression of the passed [`Expression`] pointer using the provided `visitor`. See the +/// documentation of [`EngineExpressionVisitor`] for a description of how this visitor works. +/// +/// This method returns the id that the engine generated for the top level expression +/// +/// # Safety +/// +/// The caller must pass a valid Expression pointer and expression visitor +#[no_mangle] +pub unsafe extern "C" fn visit_expression_ref( + expression: &Expression, + visitor: &mut EngineExpressionVisitor, +) -> usize { + visit_expression_internal(expression, visitor) +} + +pub fn visit_expression_internal( + expression: &Expression, + visitor: &mut EngineExpressionVisitor, ) -> usize { macro_rules! call { ( $visitor:ident, $visitor_fn:ident $(, $extra_args:expr) *) => { @@ -367,6 +390,6 @@ pub unsafe extern "C" fn visit_expression( } } let top_level = call!(visitor, make_field_list, 1); - visit_expression_impl(visitor, expression.as_ref(), top_level); + visit_expression_impl(visitor, expression, top_level); top_level } diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 0675d86bae..af35cf198b 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -330,7 +330,7 @@ pub unsafe extern "C" fn free_row_indexes(slice: KernelRowIndexArray) { /// an opaque struct that encapsulates data read by an engine. this handle can be passed back into /// some kernel calls to operate on the data, or can be converted into the raw data as read by the /// [`delta_kernel::Engine`] by calling [`get_raw_engine_data`] -#[handle_descriptor(target=dyn EngineData, mutable=true, sized=false)] +#[handle_descriptor(target=dyn EngineData, mutable=true)] pub struct ExclusiveEngineData; /// Drop an `ExclusiveEngineData`. @@ -768,7 +768,7 @@ mod tests { } } - fn get_default_engine() -> Handle { + pub(crate) fn get_default_engine() -> Handle { let path = "memory:///doesntmatter/foo"; let path = kernel_string_slice!(path); let builder = unsafe { ok_or_panic(get_engine_builder(path, allocate_err)) }; diff --git a/ffi/src/scan.rs b/ffi/src/scan.rs index 73f6910105..a457b7b9d3 100644 --- a/ffi/src/scan.rs +++ b/ffi/src/scan.rs @@ -7,7 +7,7 @@ use delta_kernel::scan::state::{visit_scan_files, DvInfo, GlobalScanState}; use delta_kernel::scan::{Scan, ScanData}; use delta_kernel::schema::Schema; use delta_kernel::snapshot::Snapshot; -use delta_kernel::{DeltaResult, Error, ExpressionRef}; +use delta_kernel::{DeltaResult, Error, Expression, ExpressionRef}; use delta_kernel_ffi_macros::handle_descriptor; use tracing::debug; use url::Url; @@ -15,6 +15,7 @@ use url::Url; use crate::expressions::engine::{ unwrap_kernel_expression, EnginePredicate, KernelExpressionVisitorState, }; +use crate::expressions::SharedExpression; use crate::{ kernel_string_slice, AllocateStringFn, ExclusiveEngineData, ExternEngine, ExternResult, IntoExternResult, KernelBoolSlice, KernelRowIndexArray, KernelStringSlice, NullableCvoid, @@ -99,12 +100,25 @@ pub unsafe extern "C" fn get_global_read_schema( state.physical_schema.clone().into() } -/// Free a global read schema +/// Get the kernel view of the physical read schema that an engine should read from parquet file in +/// a scan +/// +/// # Safety +/// Engine is responsible for providing a valid GlobalScanState pointer +#[no_mangle] +pub unsafe extern "C" fn get_global_logical_schema( + state: Handle, +) -> Handle { + let state = unsafe { state.as_ref() }; + state.logical_schema.clone().into() +} + +/// Free a schema /// /// # Safety /// Engine is responsible for providing a valid schema obtained via [`get_global_read_schema`] #[no_mangle] -pub unsafe extern "C" fn free_global_read_schema(schema: Handle) { +pub unsafe extern "C" fn free_schema(schema: Handle) { schema.drop_handle(); } @@ -263,12 +277,23 @@ pub struct Stats { pub num_records: u64, } +/// This callback will be invoked for each valid file that needs to be read for a scan. +/// +/// The arguments to the callback are: +/// * `context`: a `void*` context this can be anything that engine needs to pass through to each call +/// * `path`: a `KernelStringSlice` which is the path to the file +/// * `size`: an `i64` which is the size of the file +/// * `dv_info`: a [`DvInfo`] struct, which allows getting the selection vector for this file +/// * `transform`: An optional expression that, if not `NULL`, _must_ be applied to physical data to +/// convert it to the correct logical format. If this is `NULL`, no transform is needed. +/// * `partition_values`: [DEPRECATED] a `HashMap` which are partition values type CScanCallback = extern "C" fn( engine_context: NullableCvoid, path: KernelStringSlice, size: i64, stats: Option<&Stats>, dv_info: &DvInfo, + transform: Option<&Expression>, partition_map: &CStringMap, ); @@ -303,10 +328,40 @@ pub unsafe extern "C" fn get_from_string_map( .and_then(|v| allocate_fn(kernel_string_slice!(v))) } +/// Transformation expressions that need to be applied to each row `i` in ScanData. You can use +/// [`get_transform_for_row`] to get the transform for a particular row. If that returns an +/// associated expression, it _must_ be applied to the data read from the file specified by the +/// row. The resultant schema for this expression is guaranteed to be `Scan.schema()`. If +/// `get_transform_for_row` returns `NULL` no expression need be applied and the data read from disk +/// is already in the correct logical state. +/// +/// NB: If you are using `visit_scan_data` you don't need to worry about dealing with probing +/// `CTransforms`. The callback will be invoked with the correct transform for you. pub struct CTransforms { transforms: Vec>, } +#[no_mangle] +/// Allow getting the transform for a particular row. If the requested row is outside the range of +/// the passed `CTransforms` returns `NULL`, otherwise returns the element at the index of the +/// specified row. See also [`CTransforms`] above. +/// +/// # Safety +/// +/// The engine is responsible for providing a valid [`CTransforms`] pointer, and for checking if the +/// return value is `NULL` or not. +pub unsafe extern "C" fn get_transform_for_row( + row: usize, + transforms: &CTransforms, +) -> Option> { + transforms + .transforms + .get(row) + .cloned() + .flatten() + .map(Into::into) +} + /// Get a selection vector out of a [`DvInfo`] struct /// /// # Safety @@ -369,9 +424,10 @@ fn rust_callback( size: i64, kernel_stats: Option, dv_info: DvInfo, - _transform: Option, + transform: Option, partition_values: HashMap, ) { + let transform = transform.map(|e| e.as_ref().clone()); let partition_map = CStringMap { values: partition_values, }; @@ -384,6 +440,7 @@ fn rust_callback( size, stats.as_ref(), &dv_info, + transform.as_ref(), &partition_map, ); } diff --git a/ffi/tests/read-table-testing/expected-data/basic-partitioned.expected b/ffi/tests/read-table-testing/expected-data/basic-partitioned.expected index 4a062b1045..324ef00862 100644 --- a/ffi/tests/read-table-testing/expected-data/basic-partitioned.expected +++ b/ffi/tests/read-table-testing/expected-data/basic-partitioned.expected @@ -6,6 +6,14 @@ Schema: ├─ number: long └─ a_float: double +letter: [ + "a", + "e", + "f", + "a", + "b", + "c" +] number: [ 4, 5, @@ -22,11 +30,3 @@ a_float: [ 2.2, 3.3 ] -letter: [ - "a", - "e", - "f", - "a", - "b", - "c" -] diff --git a/kernel/src/engine/arrow_expression.rs b/kernel/src/engine/arrow_expression.rs index b7a845171e..f830d72492 100644 --- a/kernel/src/engine/arrow_expression.rs +++ b/kernel/src/engine/arrow_expression.rs @@ -22,6 +22,7 @@ use crate::arrow::datatypes::{ }; use crate::arrow::error::ArrowError; use itertools::Itertools; +use tracing::debug; use super::arrow_conversion::LIST_ARRAY_ROOT; use super::arrow_utils::make_arrow_error; @@ -538,6 +539,10 @@ pub struct DefaultExpressionEvaluator { impl ExpressionEvaluator for DefaultExpressionEvaluator { fn evaluate(&self, batch: &dyn EngineData) -> DeltaResult> { + debug!( + "Arrow evaluator evaluating: {:#?}", + self.expression.as_ref() + ); let batch = batch .any_ref() .downcast_ref::() From 301094ff6b813ab9e2e7665c4e7b3d24687e20ac Mon Sep 17 00:00:00 2001 From: Zach Schuermann Date: Fri, 21 Feb 2025 11:03:22 -0800 Subject: [PATCH 13/38] feat!(ffi): remove `visit_snapshot_schema`, add `logical_schema` (#709) ## What changes are proposed in this pull request? This PR removes the old `visit_snapshot_schema` introduced in #683 - we should just go ahead and do the 'right thing' with having a `visit_schema` (introduced in the other PR) and a `logical_schema()` function (added here) in order to facilitate visiting the snapshot schema. Additionally I've moved the schema-related things up from `scan` module to top-level in ffi crate. Exact changes listed below; this PR updates tests/examples to leverage the new changes. ### This PR affects the following public APIs 1. Remove `visit_snapshot_schema()` API 2. Add a new `logical_schema(snapshot)` API so you can get the schema of a snapshot and use the `visit_schema` directly 3. Renames `free_global_read_schema` to just `free_schema` 4. Moves `SharedSchema` and `free_schema` up from `mod scan` into top-level `ffi` crate. ## How was this change tested? existing UT --- ffi/examples/read-table/schema.h | 4 +++- ffi/src/engine_funcs.rs | 8 ++++---- ffi/src/lib.rs | 24 ++++++++++++++++++++++++ ffi/src/scan.rs | 15 ++------------- ffi/src/schema.rs | 22 +++------------------- kernel/src/snapshot.rs | 1 + 6 files changed, 37 insertions(+), 37 deletions(-) diff --git a/ffi/examples/read-table/schema.h b/ffi/examples/read-table/schema.h index 1a8960b2c5..a70bd5f5a0 100644 --- a/ffi/examples/read-table/schema.h +++ b/ffi/examples/read-table/schema.h @@ -273,7 +273,8 @@ void print_schema(SharedSnapshot* snapshot) .visit_timestamp = visit_timestamp, .visit_timestamp_ntz = visit_timestamp_ntz, }; - uintptr_t schema_list_id = visit_snapshot_schema(snapshot, &visitor); + SharedSchema* schema = logical_schema(snapshot); + uintptr_t schema_list_id = visit_schema(schema, &visitor); #ifdef VERBOSE printf("Schema returned in list %" PRIxPTR "\n", schema_list_id); #endif @@ -281,5 +282,6 @@ void print_schema(SharedSnapshot* snapshot) printf("Schema:\n"); print_list(&builder, schema_list_id, 0, 0); printf("\n"); + free_schema(schema); free_builder(builder); } diff --git a/ffi/src/engine_funcs.rs b/ffi/src/engine_funcs.rs index 7c12bcf51f..9cba0006c7 100644 --- a/ffi/src/engine_funcs.rs +++ b/ffi/src/engine_funcs.rs @@ -2,8 +2,8 @@ use std::sync::Arc; +use delta_kernel::schema::{DataType, Schema, SchemaRef}; use delta_kernel::{ - schema::{DataType, Schema, SchemaRef}, DeltaResult, EngineData, Expression, ExpressionEvaluator, FileDataReadResultIterator, }; use delta_kernel_ffi_macros::handle_descriptor; @@ -11,8 +11,8 @@ use tracing::debug; use url::Url; use crate::{ - scan::SharedSchema, ExclusiveEngineData, ExternEngine, ExternResult, IntoExternResult, - KernelStringSlice, NullableCvoid, SharedExternEngine, TryFromStringSlice, + ExclusiveEngineData, ExternEngine, ExternResult, IntoExternResult, KernelStringSlice, + NullableCvoid, SharedExternEngine, SharedSchema, TryFromStringSlice, }; use super::handle::Handle; @@ -209,7 +209,7 @@ fn evaluate_impl( #[cfg(test)] mod tests { use super::{free_evaluator, get_evaluator}; - use crate::{free_engine, handle::Handle, scan::SharedSchema, tests::get_default_engine}; + use crate::{free_engine, handle::Handle, tests::get_default_engine, SharedSchema}; use delta_kernel::{ schema::{DataType, StructField, StructType}, Expression, diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index af35cf198b..6c21e01574 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -11,6 +11,7 @@ use std::sync::Arc; use tracing::debug; use url::Url; +use delta_kernel::schema::Schema; use delta_kernel::snapshot::Snapshot; use delta_kernel::{DeltaResult, Engine, EngineData, Table}; use delta_kernel_ffi_macros::handle_descriptor; @@ -561,6 +562,9 @@ pub unsafe extern "C" fn free_engine(engine: Handle) { engine.drop_handle(); } +#[handle_descriptor(target=Schema, mutable=false, sized=true)] +pub struct SharedSchema; + #[handle_descriptor(target=Snapshot, mutable=false, sized=true)] pub struct SharedSnapshot; @@ -607,6 +611,26 @@ pub unsafe extern "C" fn version(snapshot: Handle) -> u64 { snapshot.version() } +/// Get the logical schema of the specified snapshot +/// +/// # Safety +/// +/// Caller is responsible for passing a valid snapshot handle. +#[no_mangle] +pub unsafe extern "C" fn logical_schema(snapshot: Handle) -> Handle { + let snapshot = unsafe { snapshot.as_ref() }; + Arc::new(snapshot.schema().clone()).into() +} + +/// Free a schema +/// +/// # Safety +/// Engine is responsible for providing a valid schema handle. +#[no_mangle] +pub unsafe extern "C" fn free_schema(schema: Handle) { + schema.drop_handle(); +} + /// Get the resolved root of the table. This should be used in any future calls that require /// constructing a path /// diff --git a/ffi/src/scan.rs b/ffi/src/scan.rs index a457b7b9d3..693c4b3979 100644 --- a/ffi/src/scan.rs +++ b/ffi/src/scan.rs @@ -5,7 +5,6 @@ use std::sync::{Arc, Mutex}; use delta_kernel::scan::state::{visit_scan_files, DvInfo, GlobalScanState}; use delta_kernel::scan::{Scan, ScanData}; -use delta_kernel::schema::Schema; use delta_kernel::snapshot::Snapshot; use delta_kernel::{DeltaResult, Error, Expression, ExpressionRef}; use delta_kernel_ffi_macros::handle_descriptor; @@ -19,7 +18,8 @@ use crate::expressions::SharedExpression; use crate::{ kernel_string_slice, AllocateStringFn, ExclusiveEngineData, ExternEngine, ExternResult, IntoExternResult, KernelBoolSlice, KernelRowIndexArray, KernelStringSlice, NullableCvoid, - SharedExternEngine, SharedSnapshot, StringIter, StringSliceIterator, TryFromStringSlice, + SharedExternEngine, SharedSchema, SharedSnapshot, StringIter, StringSliceIterator, + TryFromStringSlice, }; use super::handle::Handle; @@ -71,8 +71,6 @@ fn scan_impl( #[handle_descriptor(target=GlobalScanState, mutable=false, sized=true)] pub struct SharedGlobalScanState; -#[handle_descriptor(target=Schema, mutable=false, sized=true)] -pub struct SharedSchema; /// Get the global state for a scan. See the docs for [`delta_kernel::scan::state::GlobalScanState`] /// for more information. @@ -113,15 +111,6 @@ pub unsafe extern "C" fn get_global_logical_schema( state.logical_schema.clone().into() } -/// Free a schema -/// -/// # Safety -/// Engine is responsible for providing a valid schema obtained via [`get_global_read_schema`] -#[no_mangle] -pub unsafe extern "C" fn free_schema(schema: Handle) { - schema.drop_handle(); -} - /// Get a count of the number of partition columns for this scan /// /// # Safety diff --git a/ffi/src/schema.rs b/ffi/src/schema.rs index f033ac8d99..a474c80c3a 100644 --- a/ffi/src/schema.rs +++ b/ffi/src/schema.rs @@ -1,7 +1,8 @@ use std::os::raw::c_void; -use crate::scan::{CStringMap, SharedSchema}; -use crate::{handle::Handle, kernel_string_slice, KernelStringSlice, SharedSnapshot}; +use crate::handle::Handle; +use crate::scan::CStringMap; +use crate::{kernel_string_slice, KernelStringSlice, SharedSchema}; use delta_kernel::schema::{ArrayType, DataType, MapType, PrimitiveType, StructType}; /// The `EngineSchemaVisitor` defines a visitor system to allow engines to build their own @@ -192,23 +193,6 @@ pub struct EngineSchemaVisitor { ), } -/// Visit the schema of the passed `SnapshotHandle`, using the provided `visitor`. See the -/// documentation of [`EngineSchemaVisitor`] for a description of how this visitor works. -/// -/// This method returns the id of the list allocated to hold the top level schema columns. -/// -/// # Safety -/// -/// Caller is responsible for passing a valid snapshot handle and schema visitor. -#[no_mangle] -pub unsafe extern "C" fn visit_snapshot_schema( - snapshot: Handle, - visitor: &mut EngineSchemaVisitor, -) -> usize { - let snapshot = unsafe { snapshot.as_ref() }; - visit_schema_impl(snapshot.schema(), visitor) -} - /// Visit the given `schema` using the provided `visitor`. See the documentation of /// [`EngineSchemaVisitor`] for a description of how this visitor works. /// diff --git a/kernel/src/snapshot.rs b/kernel/src/snapshot.rs index e3b1bf9f0c..816511e7a8 100644 --- a/kernel/src/snapshot.rs +++ b/kernel/src/snapshot.rs @@ -98,6 +98,7 @@ impl Snapshot { } /// Table [`Schema`] at this `Snapshot`s version. + // TODO should this return SchemaRef? pub fn schema(&self) -> &Schema { self.table_configuration.schema() } From 0a77e572fbef12ec2f3c27267bf03bfb16037c44 Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Mon, 24 Feb 2025 10:33:49 -0800 Subject: [PATCH 14/38] Support --all-features again (#708) --- .github/workflows/build.yml | 8 ++--- .github/workflows/default-kernel-features | 1 - README.md | 35 ++++++-------------- integration-tests/test-all-arrow-versions.sh | 22 ++++++++++-- kernel/Cargo.toml | 7 ++-- kernel/src/arrow.rs | 14 +++++--- kernel/src/parquet.rs | 14 +++++--- 7 files changed, 60 insertions(+), 41 deletions(-) delete mode 100644 .github/workflows/default-kernel-features diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 14e6423b8f..a8a24dd074 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -37,7 +37,7 @@ jobs: cargo install cargo-msrv --locked - name: verify-msrv run: | - cargo msrv --path kernel/ verify --features $(cat .github/workflows/default-kernel-features) + cargo msrv --path kernel/ verify --all-features cargo msrv --path derive-macros/ verify --all-features cargo msrv --path ffi/ verify --all-features cargo msrv --path ffi-proc-macros/ verify --all-features @@ -104,7 +104,7 @@ jobs: - name: check kernel builds with no-default-features run: cargo build -p delta_kernel --no-default-features - name: build and lint with clippy - run: cargo clippy --benches --tests --features $(cat .github/workflows/default-kernel-features) -- -D warnings + run: cargo clippy --benches --tests --all-features -- -D warnings - name: lint without default features run: cargo clippy --no-default-features -- -D warnings - name: check kernel builds with default-engine @@ -129,7 +129,7 @@ jobs: override: true - uses: Swatinem/rust-cache@v2 - name: test - run: cargo test --workspace --verbose --features $(cat .github/workflows/default-kernel-features) -- --skip read_table_version_hdfs + run: cargo test --workspace --verbose --all-features -- --skip read_table_version_hdfs ffi_test: runs-on: ${{ matrix.os }} @@ -229,7 +229,7 @@ jobs: uses: taiki-e/install-action@cargo-llvm-cov - uses: Swatinem/rust-cache@v2 - name: Generate code coverage - run: cargo llvm-cov --features $(cat .github/workflows/default-kernel-features) --workspace --codecov --output-path codecov.json -- --skip read_table_version_hdfs + run: cargo llvm-cov --all-features --workspace --codecov --output-path codecov.json -- --skip read_table_version_hdfs - name: Upload coverage to Codecov uses: codecov/codecov-action@v5 with: diff --git a/.github/workflows/default-kernel-features b/.github/workflows/default-kernel-features deleted file mode 100644 index bee74feefa..0000000000 --- a/.github/workflows/default-kernel-features +++ /dev/null @@ -1 +0,0 @@ -integration-test,default-engine,default-engine-rustls,cloud,arrow,sync-engine diff --git a/README.md b/README.md index 6e25a2ddb5..23eff87700 100644 --- a/README.md +++ b/README.md @@ -74,32 +74,19 @@ quickly. To enable engines that already integrate arrow to also integrate kernel to track a specific version of arrow that kernel depends on, we take as broad dependency on arrow versions as we can. -This means you can force kernel to rely on the specific arrow version that your engine already uses, -as long as it falls in that range. You can see the range in the `Cargo.toml` in the same folder as -this `README.md`. +We allow selecting the version of arrow to use via feature flags. Currently we support the following +flags: -For example, although arrow 53.1.0 has been released, you can force kernel to compile on 53.0 by -putting the following in your project's `Cargo.toml`: +- `arrow_53`: Use arrow version 53 +- `arrow_54`: Use arrow version 54 -```toml -[patch.crates-io] -arrow = "53.0" -arrow-arith = "53.0" -arrow-array = "53.0" -arrow-buffer = "53.0" -arrow-cast = "53.0" -arrow-data = "53.0" -arrow-ord = "53.0" -arrow-json = "53.0" -arrow-select = "53.0" -arrow-schema = "53.0" -parquet = "53.0" -``` +Note that if more than one `arrow_x` feature is enabled, kernel will default to the _lowest_ +specified flag. This also means that if you use `--all-features` you will get the lowest version of +arrow that kernel supports. -Note that unfortunately patching in `cargo` requires that _exactly one_ version matches your -specification. If only arrow "53.0.0" had been released the above will work, but if "53.0.1" where -to be released, the specification will break and you will need to provide a more restrictive -specification like `"=53.0.0"`. +If no arrow feature is enabled, but are least one of `default-engine`, `sync-engine`, +`arrow-conversion` or, `arrow-expression` is enabled, the lowest supported arrow version will be +enabled. ### Object Store You may also need to patch the `object_store` version used if the version of `parquet` you depend on @@ -186,4 +173,4 @@ Some design principles which should be considered: [cargo-llvm-cov]: https://github.com/taiki-e/cargo-llvm-cov [FFI]: ffi/ [Arrow]: https://arrow.apache.org/rust/arrow/index.html -[Tokio]: https://tokio.rs/ \ No newline at end of file +[Tokio]: https://tokio.rs/ diff --git a/integration-tests/test-all-arrow-versions.sh b/integration-tests/test-all-arrow-versions.sh index 13fa42618f..e4207a56ec 100755 --- a/integration-tests/test-all-arrow-versions.sh +++ b/integration-tests/test-all-arrow-versions.sh @@ -2,14 +2,31 @@ set -eu -o pipefail +clean_up () { + CODE=$? + git checkout HEAD Cargo.toml + exit $CODE +} + +# ensure we checkout the clean version of Cargo.toml no matter how we exit +trap clean_up EXIT + test_arrow_version() { ARROW_VERSION="$1" echo "== Testing version $ARROW_VERSION ==" cargo clean rm -f Cargo.lock cargo update + echo "Cargo.toml is:" cat Cargo.toml - cargo run --features ${ARROW_VERSION} + echo "" + if [ "$ARROW_VERSION" = "ALL_ENABLED" ]; then + echo "testing with --all-features" + cargo run --all-features + else + echo "testing with --features ${ARROW_VERSION}" + cargo run --features ${ARROW_VERSION} + fi } FEATURES=$(cat ../kernel/Cargo.toml | grep -e ^arrow_ | awk '{ print $1 }' | sort -u) @@ -23,4 +40,5 @@ do test_arrow_version $ARROW_VERSION done -git checkout Cargo.toml +test_arrow_version "ALL_ENABLED" + diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index 01446e4717..8904ccaa5d 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -94,8 +94,9 @@ arrow_53 = ["dep:arrow_53", "dep:parquet_53"] arrow_54 = ["dep:arrow_54", "dep:parquet_54"] -arrow-conversion = [] -arrow-expression = [] +need_arrow = [] +arrow-conversion = ["need_arrow"] +arrow-expression = ["need_arrow"] cloud = [ "object_store/aws", @@ -112,6 +113,7 @@ default-engine-base = [ "arrow-conversion", "arrow-expression", "futures", + "need_arrow", "object_store", "tokio", "uuid/v4", @@ -130,6 +132,7 @@ default-engine-rustls = [ developer-visibility = [] sync-engine = [ + "need_arrow", "tempfile", ] integration-test = [ diff --git a/kernel/src/arrow.rs b/kernel/src/arrow.rs index ccae93013f..b215727b18 100644 --- a/kernel/src/arrow.rs +++ b/kernel/src/arrow.rs @@ -1,11 +1,17 @@ //! This module exists to help re-export the version of arrow used by default-engine and other //! parts of kernel that need arrow -#[cfg(all(feature = "arrow_53", feature = "arrow_54"))] -compile_error!("Multiple versions of the arrow cannot be used at the same time!"); - #[cfg(feature = "arrow_53")] pub use arrow_53::*; -#[cfg(feature = "arrow_54")] +#[cfg(all(feature = "arrow_54", not(feature = "arrow_53")))] pub use arrow_54::*; + +// if nothing is enabled but we need arrow because of some other feature flag, default to lowest +// supported version +#[cfg(all( + feature = "need_arrow", + not(feature = "arrow_53"), + not(feature = "arrow_54") +))] +pub use arrow_53::*; diff --git a/kernel/src/parquet.rs b/kernel/src/parquet.rs index bc7eba68f5..adff61c2ef 100644 --- a/kernel/src/parquet.rs +++ b/kernel/src/parquet.rs @@ -1,11 +1,17 @@ //! This module exists to help re-export the version of arrow used by default-engine and other //! parts of kernel that need arrow -#[cfg(all(feature = "arrow_53", feature = "arrow_54"))] -compile_error!("Multiple versions of the arrow cannot be used at the same time!"); - #[cfg(feature = "arrow_53")] pub use parquet_53::*; -#[cfg(feature = "arrow_54")] +#[cfg(all(feature = "arrow_54", not(feature = "arrow_53")))] pub use parquet_54::*; + +// if nothing is enabled but we need arrow because of some other feature flag, default to lowest +// supported version +#[cfg(all( + feature = "need_arrow", + not(feature = "arrow_53"), + not(feature = "arrow_54") +))] +pub use parquet_53::*; From e2245736c6c1f3be8f80b8bdf4548f036f16f5e2 Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Mon, 24 Feb 2025 18:15:11 -0800 Subject: [PATCH 15/38] fix: Make having `need_arrow` + no arrow a compiler error (#717) If we try and have a `need_arrow` flag we can make that include the code line: `pub use arrow_53::*` but we _cannot_ have it actually pull in the dependency. Pulling in the dependency is purely expressed in `Cargo.toml`, so the `use` just fails because we don't _have_ an arrow_53 dependency in that case. We can do some gross stuff in `build.rs` to inject the dependency, but even that doesn't apply to crates that depend on us so it only works if just compiling `delta-kernel` but isn't actually helpful for the use case we want. So I kept the `need-arrow` dep as a way for us to express that something needs arrow enabled, but rather than trying to do the import, it just issues a `compile_error` asking you to pick an arrow version. Perhaps we can get something more clever in the future, but for now let's unblock things. Also have test-utils depend on the arrow feature --- kernel/src/arrow.rs | 2 +- kernel/src/parquet.rs | 2 +- test-utils/Cargo.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/src/arrow.rs b/kernel/src/arrow.rs index b215727b18..915e603af8 100644 --- a/kernel/src/arrow.rs +++ b/kernel/src/arrow.rs @@ -14,4 +14,4 @@ pub use arrow_54::*; not(feature = "arrow_53"), not(feature = "arrow_54") ))] -pub use arrow_53::*; +compile_error!("Requested a feature that needs arrow without enabling arrow. Please enable the `arrow_53` or `arrow_54` feature"); diff --git a/kernel/src/parquet.rs b/kernel/src/parquet.rs index adff61c2ef..3620792901 100644 --- a/kernel/src/parquet.rs +++ b/kernel/src/parquet.rs @@ -14,4 +14,4 @@ pub use parquet_54::*; not(feature = "arrow_53"), not(feature = "arrow_54") ))] -pub use parquet_53::*; +compile_error!("Requested a feature that needs arrow without enabling arrow. Please enable the `arrow_53` or `arrow_54` feature"); diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml index b602b2e68d..1c2f3a1d74 100644 --- a/test-utils/Cargo.toml +++ b/test-utils/Cargo.toml @@ -12,6 +12,6 @@ version.workspace = true release = false [dependencies] -delta_kernel = { path = "../kernel", features = [ "default-engine" ] } +delta_kernel = { path = "../kernel", features = [ "default-engine", "arrow" ] } itertools = "0.13.0" object_store = { workspace = true } From 08309bc971886a485d9365379067a3f4988f38a6 Mon Sep 17 00:00:00 2001 From: Zach Schuermann Date: Mon, 24 Feb 2025 18:26:44 -0800 Subject: [PATCH 16/38] Release 0.7.0 (#716) release 0.7.0 --- CHANGELOG.md | 90 +++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 2 +- README.md | 4 +-- ffi/Cargo.toml | 2 +- kernel/Cargo.toml | 2 +- 5 files changed, 95 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d403a1fd4..49f89018e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,95 @@ # Changelog +## [v0.7.0](https://github.com/delta-io/delta-kernel-rs/tree/v0.7.0/) (2025-02-24) + +[Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.6.1...v0.7.0) + +### 🏗️ Breaking changes +1. Read transforms are now communicated via expressions ([#607], [#612], [#613], [#614]) This includes: + - `ScanData` now includes a third tuple field: a row-indexed vector of transforms to apply to the `EngineData`. + - Adds a new `scan::state::transform_to_logical` function that encapsulates the boilerplate of applying the transform expression + - Removes `scan_action_iter` API and `logical_to_physical` API + - Removes `column_mapping_mode` from `GlobalScanState` + - ffi: exposes methods to get an expression evaluator and evaluate an expression from c + - read-table example: Removes `add_partition_columns` in arrow.c + - read-table example: adds an `apply_transform` function in arrow.c +2. ffi: support field nullability in schema visitor ([#656]) +3. ffi: expose metadata in SchemaEngineVisitor ffi api ([#659]) +4. ffi: new `visit_schema` FFI now operates on a `Schema` instead of a `Snapshot` ([#683], [#709]) +5. Introduced feature flags (`arrow_54` and `arrow_53`) to select major arrow versions ([#654], [#708], [#717]) + +### 🚀 Features / new APIs + +1. Read `partition_values` in `RemoveVisitor` and remove `break` in `RowVisitor` for `RemoveVisitor` ([#633]) +2. Add the in-commit timestamp field to CommitInfo ([#581]) +3. Support NOT and column expressions in eval_sql_where ([#653]) +4. Add check for schema read compatibility ([#554]) +5. Introduce `TableConfiguration` to jointly manage metadata, protocol, and table properties ([#644]) +6. Add visitor `SidecarVisitor` and `Sidecar` action struct ([#673]) +7. Add in-commit timestamps table properties ([#558]) +8. Support writing to writer version 1 ([#693]) +9. ffi: new `logical_schema` FFI to get the logical schema of a snapshot ([#709]) + +### 🐛 Bug Fixes + +1. Incomplete multi-part checkpoint handling when no hint is provided ([#641]) +2. Consistent PartialEq for Scalar ([#677]) +3. Cargo fmt does not handle mods defined in macros ([#676]) +4. Ensure properly nested null masks for parquet reads ([#692]) +5. Handle predicates on non-nullable columns without stats ([#700]) + +### 📚 Documentation + +1. Update readme to reflect tracing feature is needed for read-table ([#619]) +2. Clarify `JsonHandler` semantics on EngineData ordering ([#635]) + +### 🚜 Refactor + +1. Make [non] nullable struct fields easier to create ([#646]) +2. Make eval_sql_where available to DefaultPredicateEvaluator ([#627]) + +### 🧪 Testing + +1. Port cdf tests from delta-spark to kernel ([#611]) + +### ⚙️ Chores/CI + +1. Fix some typos ([#643]) +2. Release script publishing fixes ([#638]) + +[#638]: https://github.com/delta-io/delta-kernel-rs/pull/638 +[#643]: https://github.com/delta-io/delta-kernel-rs/pull/643 +[#619]: https://github.com/delta-io/delta-kernel-rs/pull/619 +[#635]: https://github.com/delta-io/delta-kernel-rs/pull/635 +[#633]: https://github.com/delta-io/delta-kernel-rs/pull/633 +[#611]: https://github.com/delta-io/delta-kernel-rs/pull/611 +[#581]: https://github.com/delta-io/delta-kernel-rs/pull/581 +[#646]: https://github.com/delta-io/delta-kernel-rs/pull/646 +[#627]: https://github.com/delta-io/delta-kernel-rs/pull/627 +[#641]: https://github.com/delta-io/delta-kernel-rs/pull/641 +[#653]: https://github.com/delta-io/delta-kernel-rs/pull/653 +[#607]: https://github.com/delta-io/delta-kernel-rs/pull/607 +[#656]: https://github.com/delta-io/delta-kernel-rs/pull/656 +[#554]: https://github.com/delta-io/delta-kernel-rs/pull/554 +[#644]: https://github.com/delta-io/delta-kernel-rs/pull/644 +[#659]: https://github.com/delta-io/delta-kernel-rs/pull/659 +[#612]: https://github.com/delta-io/delta-kernel-rs/pull/612 +[#677]: https://github.com/delta-io/delta-kernel-rs/pull/677 +[#676]: https://github.com/delta-io/delta-kernel-rs/pull/676 +[#673]: https://github.com/delta-io/delta-kernel-rs/pull/673 +[#613]: https://github.com/delta-io/delta-kernel-rs/pull/613 +[#558]: https://github.com/delta-io/delta-kernel-rs/pull/558 +[#692]: https://github.com/delta-io/delta-kernel-rs/pull/692 +[#700]: https://github.com/delta-io/delta-kernel-rs/pull/700 +[#683]: https://github.com/delta-io/delta-kernel-rs/pull/683 +[#654]: https://github.com/delta-io/delta-kernel-rs/pull/654 +[#693]: https://github.com/delta-io/delta-kernel-rs/pull/693 +[#614]: https://github.com/delta-io/delta-kernel-rs/pull/614 +[#709]: https://github.com/delta-io/delta-kernel-rs/pull/709 +[#708]: https://github.com/delta-io/delta-kernel-rs/pull/708 +[#717]: https://github.com/delta-io/delta-kernel-rs/pull/717 + + ## [v0.6.1](https://github.com/delta-io/delta-kernel-rs/tree/v0.6.1/) (2025-01-10) [Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.6.0...v0.6.1) diff --git a/Cargo.toml b/Cargo.toml index aec38fc78b..b857043109 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ license = "Apache-2.0" repository = "https://github.com/delta-io/delta-kernel-rs" readme = "README.md" rust-version = "1.80" -version = "0.6.1" +version = "0.7.0" [workspace.dependencies] object_store = { version = ">=0.11, <0.12" } diff --git a/README.md b/README.md index 23eff87700..1cddea1a8e 100644 --- a/README.md +++ b/README.md @@ -43,10 +43,10 @@ consumer's own `Engine` trait, the kernel has a feature flag to enable a default ```toml # fewer dependencies, requires consumer to implement Engine trait. # allows consumers to implement their own in-memory format -delta_kernel = "0.6.1" +delta_kernel = "0.7.0" # or turn on the default engine, based on arrow -delta_kernel = { version = "0.6.1", features = ["default-engine"] } +delta_kernel = { version = "0.7.0", features = ["default-engine"] } ``` ### Feature flags diff --git a/ffi/Cargo.toml b/ffi/Cargo.toml index d588427b00..fe0aa97eb8 100644 --- a/ffi/Cargo.toml +++ b/ffi/Cargo.toml @@ -25,7 +25,7 @@ delta_kernel = { path = "../kernel", default-features = false, features = [ "arrow", "developer-visibility", ] } -delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.6.1" } +delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.7.0" } [build-dependencies] cbindgen = "0.28" diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index 8904ccaa5d..886ba3c60f 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -51,7 +51,7 @@ uuid = "1.10.0" z85 = "3.0.5" # bring in our derive macros -delta_kernel_derive = { path = "../derive-macros", version = "0.6.1" } +delta_kernel_derive = { path = "../derive-macros", version = "0.7.0" } # used for developer-visibility visibility = "0.1.1" From 5c4d579884f4238860130d36030e5f680d388625 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Fri, 28 Feb 2025 18:02:04 -0500 Subject: [PATCH 17/38] pin chrono version to fix arrow compilation failure (#719) ## What changes are proposed in this pull request? Current chrono 0.4.40 breaks building arrow, pin chrono to a prior version that does not break arrow. ## How was this change tested? CI/CD --------- Co-authored-by: Zach Schuermann --- kernel/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index 886ba3c60f..b6c6afbb03 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -36,7 +36,7 @@ pre-release-hook = [ [dependencies] bytes = "1.7" -chrono = { version = "0.4" } +chrono = "=0.4.39" fix-hidden-lifetime-bug = "0.2" indexmap = "2.5.0" itertools = "0.13" From 51553f2bea336424a45f71c2b2efac8bb618403a Mon Sep 17 00:00:00 2001 From: Ryan Johnson Date: Mon, 3 Mar 2025 12:29:41 -0700 Subject: [PATCH 18/38] fix!(ffi): Visit decimals as signed values (#724) ## What changes are proposed in this pull request? The FFI expression visitor code incorrectly passes a `(u64, u64)` pair to `visit_literal_decimal` callback, representing the upper and lower half of an `i128` decimal value. It should actually be `(i64, u64)` to preserve signedness. ### This PR affects the following public APIs The expression visitor callback `visit_literal_decimal` now takes `i64` for the upper half of a 128-bit int value. ## How was this change tested? Updated the example code. --- ffi/examples/visit-expression/expression.h | 9 +++++---- ffi/examples/visit-expression/expression_print.h | 6 +++--- ffi/src/expressions/kernel.rs | 8 +++----- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/ffi/examples/visit-expression/expression.h b/ffi/examples/visit-expression/expression.h index eee88d1dcb..f668860c5f 100644 --- a/ffi/examples/visit-expression/expression.h +++ b/ffi/examples/visit-expression/expression.h @@ -87,7 +87,8 @@ struct BinaryData { uintptr_t len; }; struct Decimal { - uint64_t value[2]; + int64_t hi; + uint64_t lo; uint8_t precision; uint8_t scale; }; @@ -202,15 +203,15 @@ void visit_expr_string_literal(void* data, uintptr_t sibling_list_id, KernelStri } void visit_expr_decimal_literal(void* data, uintptr_t sibling_list_id, - uint64_t value_ms, + int64_t value_ms, uint64_t value_ls, uint8_t precision, uint8_t scale) { struct Literal* literal = malloc(sizeof(struct Literal)); literal->type = Decimal; struct Decimal* dec = &literal->value.decimal; - dec->value[0] = value_ms; - dec->value[1] = value_ls; + dec->hi = value_ms; + dec->lo = value_ls; dec->precision = precision; dec->scale = scale; put_expr_item(data, sibling_list_id, literal, Literal); diff --git a/ffi/examples/visit-expression/expression_print.h b/ffi/examples/visit-expression/expression_print.h index 7507c8de07..0b36c9de7b 100644 --- a/ffi/examples/visit-expression/expression_print.h +++ b/ffi/examples/visit-expression/expression_print.h @@ -144,9 +144,9 @@ void print_tree_helper(ExpressionItem ref, int depth) { } case Decimal: { struct Decimal* dec = &lit->value.decimal; - printf("Decimal(%lld,%lld,%d,%d)\n", - (long long)dec->value[0], - (long long)dec->value[1], + printf("Decimal(%lld,%llu,%d,%d)\n", + (long long)dec->hi, + (unsigned long long)dec->lo, dec->precision, dec->scale); break; diff --git a/ffi/src/expressions/kernel.rs b/ffi/src/expressions/kernel.rs index a2a1dcd1f6..32db7db58f 100644 --- a/ffi/src/expressions/kernel.rs +++ b/ffi/src/expressions/kernel.rs @@ -96,7 +96,7 @@ pub struct EngineExpressionVisitor { pub visit_literal_decimal: extern "C" fn( data: *mut c_void, sibling_list_id: usize, - value_ms: u64, + value_ms: i64, value_ls: u64, precision: u8, scale: u8, @@ -318,14 +318,12 @@ pub fn visit_expression_internal( buf.len() ), Scalar::Decimal(value, precision, scale) => { - let ms: u64 = (value >> 64) as u64; - let ls: u64 = *value as u64; call!( visitor, visit_literal_decimal, sibling_list_id, - ms, - ls, + (value >> 64) as i64, + *value as u64, *precision, *scale ) From bf97a244214a71788f3426c56fbbd56408d5483c Mon Sep 17 00:00:00 2001 From: Nick Lanham Date: Mon, 3 Mar 2025 11:43:20 -0800 Subject: [PATCH 19/38] feat!(ffi): Make get_partition_column* work on a snapshot. (#697) Make `get_partition_column_count` and `get_partition_columns` take a snapshot so engines can work this out at planning time without creating a scan. The previous methods to get this info out of a scan have been removed. The old functions that took snapshots have been removed New unit test --- ffi/examples/read-table/read_table.c | 9 ++-- ffi/src/lib.rs | 65 ++++++++++++++++++++++++++-- ffi/src/scan.rs | 26 +---------- kernel/src/engine/mod.rs | 4 +- test-utils/src/lib.rs | 32 ++++++++++---- 5 files changed, 94 insertions(+), 42 deletions(-) diff --git a/ffi/examples/read-table/read_table.c b/ffi/examples/read-table/read_table.c index 0ddc20ded4..d24be5a6bd 100644 --- a/ffi/examples/read-table/read_table.c +++ b/ffi/examples/read-table/read_table.c @@ -113,15 +113,15 @@ void visit_partition(void* context, const KernelStringSlice partition) } // Build a list of partition column names. -PartitionList* get_partition_list(SharedGlobalScanState* state) +PartitionList* get_partition_list(SharedSnapshot* snapshot) { print_diag("Building list of partition columns\n"); - uintptr_t count = get_partition_column_count(state); + uintptr_t count = get_partition_column_count(snapshot); PartitionList* list = malloc(sizeof(PartitionList)); // We set the `len` to 0 here and use it to track how many items we've added to the list list->len = 0; list->cols = malloc(sizeof(char*) * count); - StringSliceIterator* part_iter = get_partition_columns(state); + StringSliceIterator* part_iter = get_partition_columns(snapshot); for (;;) { bool has_next = string_slice_next(part_iter, list, visit_partition); if (!has_next) { @@ -264,6 +264,8 @@ int main(int argc, char* argv[]) char* table_root = snapshot_table_root(snapshot, allocate_string); print_diag("Table root: %s\n", table_root); + PartitionList* partition_cols = get_partition_list(snapshot); + print_diag("Starting table scan\n\n"); ExternResultHandleSharedScan scan_res = scan(snapshot, engine, NULL); @@ -276,7 +278,6 @@ int main(int argc, char* argv[]) SharedGlobalScanState* global_state = get_global_scan_state(scan); SharedSchema* logical_schema = get_global_logical_schema(global_state); SharedSchema* read_schema = get_global_read_schema(global_state); - PartitionList* partition_cols = get_partition_list(global_state); struct EngineContext context = { global_state, logical_schema, diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index 6c21e01574..a7725db29a 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -636,7 +636,7 @@ pub unsafe extern "C" fn free_schema(schema: Handle) { /// /// # Safety /// -/// Caller is responsible for passing a valid handle. +/// Caller is responsible for passing a valid snapshot handle. #[no_mangle] pub unsafe extern "C" fn snapshot_table_root( snapshot: Handle, @@ -647,6 +647,29 @@ pub unsafe extern "C" fn snapshot_table_root( allocate_fn(kernel_string_slice!(table_root)) } +/// Get a count of the number of partition columns for this snapshot +/// +/// # Safety +/// Caller is responsible for passing a valid snapshot handle +#[no_mangle] +pub unsafe extern "C" fn get_partition_column_count(snapshot: Handle) -> usize { + let snapshot = unsafe { snapshot.as_ref() }; + snapshot.metadata().partition_columns.len() +} + +/// Get an iterator of the list of partition columns for this snapshot. +/// +/// # Safety +/// Caller is responsible for passing a valid snapshot handle. +#[no_mangle] +pub unsafe extern "C" fn get_partition_columns( + snapshot: Handle, +) -> Handle { + let snapshot = unsafe { snapshot.as_ref() }; + let iter: Box = Box::new(snapshot.metadata().partition_columns.clone().into_iter()); + iter.into() +} + type StringIter = dyn Iterator + Send; #[handle_descriptor(target=StringIter, mutable=true, sized=false)] @@ -742,8 +765,8 @@ impl Default for ReferenceSet { #[cfg(test)] mod tests { use delta_kernel::engine::default::{executor::tokio::TokioBackgroundExecutor, DefaultEngine}; - use object_store::memory::InMemory; - use test_utils::{actions_to_string, add_commit, TestAction}; + use object_store::{memory::InMemory, path::Path}; + use test_utils::{actions_to_string, actions_to_string_partitioned, add_commit, TestAction}; use super::*; use crate::error::{EngineError, KernelError}; @@ -836,6 +859,42 @@ mod tests { Ok(()) } + #[tokio::test] + async fn test_snapshot_partition_cols() -> Result<(), Box> { + let storage = Arc::new(InMemory::new()); + add_commit( + storage.as_ref(), + 0, + actions_to_string_partitioned(vec![TestAction::Metadata]), + ) + .await?; + let engine = DefaultEngine::new(storage.clone(), Arc::new(TokioBackgroundExecutor::new())); + let engine = engine_to_handle(Arc::new(engine), allocate_err); + let path = "memory:///"; + + let snapshot = + unsafe { ok_or_panic(snapshot(kernel_string_slice!(path), engine.shallow_copy())) }; + + let partition_count = unsafe { get_partition_column_count(snapshot.shallow_copy()) }; + assert_eq!(partition_count, 1, "Should have one partition"); + + let partition_iter = unsafe { get_partition_columns(snapshot.shallow_copy()) }; + + #[no_mangle] + extern "C" fn visit_partition(_context: NullableCvoid, slice: KernelStringSlice) { + let s = unsafe { String::try_from_slice(&slice) }.unwrap(); + assert_eq!(s.as_str(), "val", "Partition col should be 'val'"); + } + while unsafe { string_slice_next(partition_iter.shallow_copy(), None, visit_partition) } { + // validate happens inside visit_partition + } + + unsafe { free_string_slice_data(partition_iter) } + unsafe { free_snapshot(snapshot) } + unsafe { free_engine(engine) } + Ok(()) + } + #[test] #[cfg(feature = "sync-engine")] fn sync_engine() { diff --git a/ffi/src/scan.rs b/ffi/src/scan.rs index 693c4b3979..367817787a 100644 --- a/ffi/src/scan.rs +++ b/ffi/src/scan.rs @@ -18,8 +18,7 @@ use crate::expressions::SharedExpression; use crate::{ kernel_string_slice, AllocateStringFn, ExclusiveEngineData, ExternEngine, ExternResult, IntoExternResult, KernelBoolSlice, KernelRowIndexArray, KernelStringSlice, NullableCvoid, - SharedExternEngine, SharedSchema, SharedSnapshot, StringIter, StringSliceIterator, - TryFromStringSlice, + SharedExternEngine, SharedSchema, SharedSnapshot, TryFromStringSlice, }; use super::handle::Handle; @@ -111,29 +110,6 @@ pub unsafe extern "C" fn get_global_logical_schema( state.logical_schema.clone().into() } -/// Get a count of the number of partition columns for this scan -/// -/// # Safety -/// Caller is responsible for passing a valid global scan pointer. -#[no_mangle] -pub unsafe extern "C" fn get_partition_column_count(state: Handle) -> usize { - let state = unsafe { state.as_ref() }; - state.partition_columns.len() -} - -/// Get an iterator of the list of partition columns for this scan. -/// -/// # Safety -/// Caller is responsible for passing a valid global scan pointer. -#[no_mangle] -pub unsafe extern "C" fn get_partition_columns( - state: Handle, -) -> Handle { - let state = unsafe { state.as_ref() }; - let iter: Box = Box::new(state.partition_columns.clone().into_iter()); - iter.into() -} - /// # Safety /// /// Caller is responsible for passing a valid global scan state pointer. diff --git a/kernel/src/engine/mod.rs b/kernel/src/engine/mod.rs index e962ee5a3f..f62da336f6 100644 --- a/kernel/src/engine/mod.rs +++ b/kernel/src/engine/mod.rs @@ -30,13 +30,13 @@ pub mod parquet_row_group_skipping; #[cfg(test)] mod tests { - use arrow_array::{RecordBatch, StringArray}; - use arrow_schema::{DataType as ArrowDataType, Field, Schema as ArrowSchema}; use itertools::Itertools; use object_store::path::Path; use std::sync::Arc; use url::Url; + use crate::arrow::array::{RecordBatch, StringArray}; + use crate::arrow::datatypes::{DataType as ArrowDataType, Field, Schema as ArrowSchema}; use crate::engine::arrow_data::ArrowEngineData; use crate::{Engine, EngineData}; diff --git a/test-utils/src/lib.rs b/test-utils/src/lib.rs index e8747c539a..a8b7c6610a 100644 --- a/test-utils/src/lib.rs +++ b/test-utils/src/lib.rs @@ -16,22 +16,38 @@ pub const METADATA: &str = r#"{"commitInfo":{"timestamp":1587968586154,"operatio {"protocol":{"minReaderVersion":1,"minWriterVersion":2}} {"metaData":{"id":"5fba94ed-9794-4965-ba6e-6ee3c0d22af9","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"val\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{},"createdTime":1587968585495}}"#; +/// A common useful initial metadata and protocol. Also includes a single commitInfo +pub const METADATA_WITH_PARTITION_COLS: &str = r#"{"commitInfo":{"timestamp":1587968586154,"operation":"WRITE","operationParameters":{"mode":"ErrorIfExists","partitionBy":"[]"},"isBlindAppend":true}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} +{"metaData":{"id":"5fba94ed-9794-4965-ba6e-6ee3c0d22af9","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"val\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["val"],"configuration":{},"createdTime":1587968585495}}"#; + pub enum TestAction { Add(String), Remove(String), Metadata, } -/// Convert a vector of actions into a newline delimited json string +// TODO: We need a better way to mock tables :) + +/// Convert a vector of actions into a newline delimited json string, with standard metadata pub fn actions_to_string(actions: Vec) -> String { + actions_to_string_with_metadata(actions, METADATA) +} + +/// Convert a vector of actions into a newline delimited json string, with metadata including a partition column +pub fn actions_to_string_partitioned(actions: Vec) -> String { + actions_to_string_with_metadata(actions, METADATA_WITH_PARTITION_COLS) +} + +fn actions_to_string_with_metadata(actions: Vec, metadata: &str) -> String { actions - .into_iter() - .map(|test_action| match test_action { - TestAction::Add(path) => format!(r#"{{"add":{{"path":"{path}","partitionValues":{{}},"size":262,"modificationTime":1587968586000,"dataChange":true, "stats":"{{\"numRecords\":2,\"nullCount\":{{\"id\":0}},\"minValues\":{{\"id\": 1}},\"maxValues\":{{\"id\":3}}}}"}}}}"#), - TestAction::Remove(path) => format!(r#"{{"remove":{{"path":"{path}","partitionValues":{{}},"size":262,"modificationTime":1587968586000,"dataChange":true}}}}"#), - TestAction::Metadata => METADATA.into(), - }) - .join("\n") + .into_iter() + .map(|test_action| match test_action { + TestAction::Add(path) => format!(r#"{{"add":{{"path":"{path}","partitionValues":{{}},"size":262,"modificationTime":1587968586000,"dataChange":true, "stats":"{{\"numRecords\":2,\"nullCount\":{{\"id\":0}},\"minValues\":{{\"id\": 1}},\"maxValues\":{{\"id\":3}}}}"}}}}"#), + TestAction::Remove(path) => format!(r#"{{"remove":{{"path":"{path}","partitionValues":{{}},"size":262,"modificationTime":1587968586000,"dataChange":true}}}}"#), + TestAction::Metadata => metadata.into(), + }) + .join("\n") } /// convert a RecordBatch into a vector of bytes. We can't use `From` since these are both foreign From 945ff1c7cb9e170371f4fd56e4b5f62a41c4bc7e Mon Sep 17 00:00:00 2001 From: Zach Schuermann Date: Mon, 3 Mar 2025 13:46:42 -0800 Subject: [PATCH 20/38] fix!: bump MSRV to 1.81 (#725) ## What changes are proposed in this pull request? `Url` crate now has MSRV with default unicode backend of rustc `1.81`. instead of fighting this, we will just bump up our MSRV from `1.80` to `1.81` seeing as (1) a large part of the ecosystem (datafusion, polars (though still states `1.80` in README), delta-rs, etc.) already is on `1.81` and (2) `Url` is a rather foundational crate so if they bump it seems reasonable to assume that many consumers will too ### This PR affects the following public APIs bumping MSRV from `1.80` to `1.81` ## How was this change tested? MSRV test --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index b857043109..aa75b3765e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ keywords = ["deltalake", "delta", "datalake"] license = "Apache-2.0" repository = "https://github.com/delta-io/delta-kernel-rs" readme = "README.md" -rust-version = "1.80" +rust-version = "1.81" version = "0.7.0" [workspace.dependencies] From 9b1a91f5f3be78c198426b258416cc068fcc7957 Mon Sep 17 00:00:00 2001 From: Zach Schuermann Date: Mon, 3 Mar 2025 15:52:31 -0800 Subject: [PATCH 21/38] perf!: replace default engine JSON reader's `FileStream` with concurrent futures (#711) ## What changes are proposed in this pull request? The original `FileStream` API, though intended to concurrently make GET requests to the object store, actually made serial requests and relied on a hand-written poll function in order to implement `Stream`. This PR aims to make a minimal change in order to (1) increase performance for the JSON reader by issuing concurrent GET requests and (2) simplify the code by removing the need for a custom `Stream` and instead leverage existing functions/adapters to convert the files to read into a `Stream` and issue concurrent requests through the [`futures::stream::buffered`](https://docs.rs/futures/latest/futures/stream/struct.Buffered.html) adapter. This is effectively a similar improvement as in #595 but for the JSON reader. ### Specifically, the changes are: 1. replace the `FileStream::new_async_read_iterator()` call (the manually-implemented `Stream`) with an inline implementation of converting the files slice into a Stream (via `stream::iter`) and use the [`futures::stream::buffered`](https://docs.rs/futures/latest/futures/stream/struct.Buffered.html) adapter to concurrently execute file opening futures. It then sends results across an `mpsc` channel to bridge the async/sync gap. 2. JsonOpener no longer implements `FileOpener` (which requires a synchronous `fn open()` and instead directly exposes an `async fn open()` for easier/simpler use above. This removes all reliance on `FileStream`/`FileOpener` in the JSON reader. 3. adds a custom `ObjectStore` implementation: `OrderedGetStore` to deterministically control the ordering in which GET request futures are resolved ### This PR affects the following public APIs - `DefaultJsonHandler::with_readahead()` renamed to `DefaultJsonHandler::with_buffer_size()` - DefaultJsonHandler's default buffer size: 10 => 1000 - DefaultJsonHandler's default batch size: 1024 => 1000 ## How was this change tested? added test with a new `OrderedGetStore` which will resolve the GET requests in a jumbled order but we expect the test to return the natural order of requests. in a additionally, manually validated that we went from serial JSON file reads to concurrent reads --- feature-tests/src/lib.rs | 5 +- kernel/Cargo.toml | 1 + kernel/src/engine/default/json.rs | 598 +++++++++++++++++++++++++----- 3 files changed, 517 insertions(+), 87 deletions(-) diff --git a/feature-tests/src/lib.rs b/feature-tests/src/lib.rs index a421d86f99..6a07429f15 100644 --- a/feature-tests/src/lib.rs +++ b/feature-tests/src/lib.rs @@ -1,7 +1,10 @@ /// This is a compilation test to ensure that the default-engine feature flags are working -/// correctly. Run (from workspace root) with: +/// correctly. +/// +/// Run (from workspace root) with: /// 1. `cargo b -p feature_tests --features default-engine-rustls` /// 2. `cargo b -p feature_tests --features default-engine` +/// /// These run in our build CI. pub fn test_default_engine_feature_flags() { #[cfg(any(feature = "default-engine", feature = "default-engine-rustls"))] diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index b6c6afbb03..b064e3a9ad 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -150,6 +150,7 @@ rustc_version = "0.4.1" [dev-dependencies] delta_kernel = { path = ".", features = ["arrow", "default-engine", "sync-engine"] } test_utils = { path = "../test-utils" } +async-trait = "0.1" # only used for our custom SlowGetStore ObjectStore implementation paste = "1.0" test-log = { version = "0.2", default-features = false, features = ["trace"] } tempfile = "3" diff --git a/kernel/src/engine/default/json.rs b/kernel/src/engine/default/json.rs index bef3b30cd4..3e0173f956 100644 --- a/kernel/src/engine/default/json.rs +++ b/kernel/src/engine/default/json.rs @@ -2,19 +2,22 @@ use std::io::BufReader; use std::ops::Range; -use std::sync::Arc; -use std::task::{ready, Poll}; +use std::sync::{mpsc, Arc}; +use std::task::Poll; use crate::arrow::datatypes::SchemaRef as ArrowSchemaRef; use crate::arrow::json::ReaderBuilder; +use crate::arrow::record_batch::RecordBatch; use bytes::{Buf, Bytes}; -use futures::{StreamExt, TryStreamExt}; +use futures::stream::{self, BoxStream}; +use futures::{ready, StreamExt, TryStreamExt}; use object_store::path::Path; use object_store::{DynObjectStore, GetResultPayload}; +use tracing::warn; use url::Url; use super::executor::TaskExecutor; -use super::file_stream::{FileOpenFuture, FileOpener, FileStream}; +use crate::engine::arrow_data::ArrowEngineData; use crate::engine::arrow_utils::parse_json as arrow_parse_json; use crate::engine::arrow_utils::to_json_bytes; use crate::schema::SchemaRef; @@ -23,15 +26,21 @@ use crate::{ JsonHandler, }; +const DEFAULT_BUFFER_SIZE: usize = 1000; +const DEFAULT_BATCH_SIZE: usize = 1000; + #[derive(Debug)] pub struct DefaultJsonHandler { /// The object store to read files from store: Arc, /// The executor to run async tasks on task_executor: Arc, - /// The maximum number of batches to read ahead - readahead: usize, - /// The number of rows to read per batch + /// The maximum number of read requests to buffer in memory at once. Note that this actually + /// controls two things: the number of concurrent requests (done by `buffered`) and the size of + /// the buffer (via our `sync_channel`). + buffer_size: usize, + /// Limit the number of rows per batch. That is, for batch_size = N, then each RecordBatch + /// yielded by the stream will have at most N rows. batch_size: usize, } @@ -40,22 +49,34 @@ impl DefaultJsonHandler { Self { store, task_executor, - readahead: 10, - batch_size: 1024, + buffer_size: DEFAULT_BUFFER_SIZE, + batch_size: DEFAULT_BATCH_SIZE, } } - /// Set the maximum number of batches to read ahead during [Self::read_json_files()]. + /// Set the maximum number read requests to buffer in memory at once in + /// [Self::read_json_files()]. + /// + /// Defaults to 1000. /// - /// Defaults to 10. - pub fn with_readahead(mut self, readahead: usize) -> Self { - self.readahead = readahead; + /// Memory constraints can be imposed by constraining the buffer size and batch size. Note that + /// overall memory usage is proportional to the product of these two values. + /// 1. Batch size governs the size of RecordBatches yielded in each iteration of the stream + /// 2. Buffer size governs the number of concurrent tasks (which equals the size of the buffer + pub fn with_buffer_size(mut self, buffer_size: usize) -> Self { + self.buffer_size = buffer_size; self } - /// Set the number of rows to read per batch during [Self::parse_json()]. + /// Limit the number of rows per batch. That is, for batch_size = N, then each RecordBatch + /// yielded by the stream will have at most N rows. + /// + /// Defaults to 1000 rows (json objects). + /// + /// See [Decoder::with_buffer_size] for details on constraining memory usage with buffer size + /// and batch size. /// - /// Defaults to 1024. + /// [Decoder::with_buffer_size]: crate::arrow::json::reader::Decoder pub fn with_batch_size(mut self, batch_size: usize) -> Self { self.batch_size = batch_size; self @@ -83,13 +104,32 @@ impl JsonHandler for DefaultJsonHandler { let schema: ArrowSchemaRef = Arc::new(physical_schema.as_ref().try_into()?); let file_opener = JsonOpener::new(self.batch_size, schema.clone(), self.store.clone()); - FileStream::new_async_read_iterator( - self.task_executor.clone(), - schema, - Box::new(file_opener), - files, - self.readahead, - ) + + let (tx, rx) = mpsc::sync_channel(self.buffer_size); + let files = files.to_vec(); + let buffer_size = self.buffer_size; + + self.task_executor.spawn(async move { + // an iterator of futures that open each file + let file_futures = files.into_iter().map(|file| file_opener.open(file, None)); + + // create a stream from that iterator which buffers up to `buffer_size` futures at a time + let mut stream = stream::iter(file_futures) + .buffered(buffer_size) + .try_flatten() + .map_ok(|record_batch| -> Box { + Box::new(ArrowEngineData::new(record_batch)) + }); + + // send each record batch over the channel + while let Some(item) = stream.next().await { + if tx.send(item).is_err() { + warn!("read_json receiver end of channel dropped before sending completed"); + } + } + }); + + Ok(Box::new(rx.into_iter())) } // note: for now we just buffer all the data and write it out all at once @@ -118,7 +158,7 @@ impl JsonHandler for DefaultJsonHandler { } } -/// A [`FileOpener`] that opens a JSON file and yields a [`FileOpenFuture`] +/// Opens JSON files and returns a stream of record batches #[allow(missing_debug_implementations)] pub struct JsonOpener { batch_size: usize, @@ -127,90 +167,309 @@ pub struct JsonOpener { } impl JsonOpener { - /// Returns a [`JsonOpener`] + /// Returns a [`JsonOpener`] pub fn new( batch_size: usize, projected_schema: ArrowSchemaRef, - // file_compression_type: FileCompressionType, object_store: Arc, ) -> Self { Self { batch_size, projected_schema, - // file_compression_type, object_store, } } } -impl FileOpener for JsonOpener { - fn open(&self, file_meta: FileMeta, _: Option>) -> DeltaResult { +impl JsonOpener { + pub async fn open( + &self, + file_meta: FileMeta, + _: Option>, + ) -> DeltaResult>> { let store = self.object_store.clone(); let schema = self.projected_schema.clone(); let batch_size = self.batch_size; - Ok(Box::pin(async move { - let path = Path::from_url_path(file_meta.location.path())?; - match store.get(&path).await?.payload { - GetResultPayload::File(file, _) => { - let reader = ReaderBuilder::new(schema) - .with_batch_size(batch_size) - .build(BufReader::new(file))?; - Ok(futures::stream::iter(reader).map_err(Error::from).boxed()) - } - GetResultPayload::Stream(s) => { - let mut decoder = ReaderBuilder::new(schema) - .with_batch_size(batch_size) - .build_decoder()?; - - let mut input = s.map_err(Error::from); - let mut buffered = Bytes::new(); - - let s = futures::stream::poll_fn(move |cx| { - loop { - if buffered.is_empty() { - buffered = match ready!(input.poll_next_unpin(cx)) { - Some(Ok(b)) => b, - Some(Err(e)) => return Poll::Ready(Some(Err(e))), - None => break, - }; - } - let read = buffered.len(); - - let decoded = match decoder.decode(buffered.as_ref()) { - Ok(decoded) => decoded, - Err(e) => return Poll::Ready(Some(Err(e.into()))), + let path = Path::from_url_path(file_meta.location.path())?; + match store.get(&path).await?.payload { + GetResultPayload::File(file, _) => { + let reader = ReaderBuilder::new(schema) + .with_batch_size(batch_size) + .build(BufReader::new(file))?; + Ok(futures::stream::iter(reader).map_err(Error::from).boxed()) + } + GetResultPayload::Stream(s) => { + let mut decoder = ReaderBuilder::new(schema) + .with_batch_size(batch_size) + .build_decoder()?; + + let mut input = s.map_err(Error::from); + let mut buffered = Bytes::new(); + + let s = futures::stream::poll_fn(move |cx| { + loop { + if buffered.is_empty() { + buffered = match ready!(input.poll_next_unpin(cx)) { + Some(Ok(b)) => b, + Some(Err(e)) => return Poll::Ready(Some(Err(e))), + None => break, }; - - buffered.advance(decoded); - if decoded != read { - break; - } } + let read = buffered.len(); + + // NB (from Decoder::decode docs): + // Read JSON objects from `buf` (param), returning the number of bytes read + // + // This method returns once `batch_size` objects have been parsed since the + // last call to [`Self::flush`], or `buf` is exhausted. Any remaining bytes + // should be included in the next call to [`Self::decode`] + let decoded = match decoder.decode(buffered.as_ref()) { + Ok(decoded) => decoded, + Err(e) => return Poll::Ready(Some(Err(e.into()))), + }; + + buffered.advance(decoded); + if decoded != read { + break; + } + } - Poll::Ready(decoder.flush().map_err(Error::from).transpose()) - }); - Ok(s.map_err(Error::from).boxed()) - } + Poll::Ready(decoder.flush().map_err(Error::from).transpose()) + }); + Ok(s.map_err(Error::from).boxed()) } - })) + } } } #[cfg(test)] mod tests { + use std::collections::{HashMap, HashSet, VecDeque}; use std::path::PathBuf; + use std::sync::{mpsc, Arc, Mutex}; + use std::task::Waker; - use crate::arrow::array::{AsArray, RecordBatch, StringArray}; + use crate::actions::get_log_schema; + use crate::arrow::array::{AsArray, Int32Array, RecordBatch, StringArray}; use crate::arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; + use crate::engine::arrow_data::ArrowEngineData; + use crate::engine::default::executor::tokio::{ + TokioBackgroundExecutor, TokioMultiThreadExecutor, + }; + use futures::future; use itertools::Itertools; - use object_store::{local::LocalFileSystem, ObjectStore}; + use object_store::local::LocalFileSystem; + use object_store::memory::InMemory; + use object_store::{ + GetOptions, GetResult, ListResult, MultipartUpload, ObjectMeta, ObjectStore, + PutMultipartOpts, PutOptions, PutPayload, PutResult, Result, + }; + + // TODO: should just use the one from test_utils, but running into dependency issues + fn into_record_batch(engine_data: Box) -> RecordBatch { + ArrowEngineData::try_from_engine_data(engine_data) + .unwrap() + .into() + } use super::*; - use crate::{ - actions::get_log_schema, engine::arrow_data::ArrowEngineData, - engine::default::executor::tokio::TokioBackgroundExecutor, - }; + + /// Store wrapper that wraps an inner store to guarantee the ordering of GET requests. Note + /// that since the keys are resolved in order, requests to subsequent keys in the order will + /// block until the earlier keys are requested. + /// + /// WARN: Does not handle duplicate keys, and will fail on duplicate requests of the same key. + /// + // TODO(zach): we can handle duplicate requests if we retain the ordering of the keys track + // that all of the keys prior to the one requested have been resolved. + #[derive(Debug)] + struct OrderedGetStore { + // The ObjectStore we are wrapping + inner: T, + // Combined state: queue and wakers, protected by a single mutex + state: Mutex, + } + + #[derive(Debug)] + struct KeysAndWakers { + // Queue of paths in order which they will resolve + ordered_keys: VecDeque, + // Map of paths to wakers for pending get requests + wakers: HashMap, + } + + impl OrderedGetStore { + fn new(inner: T, ordered_keys: &[Path]) -> Self { + let ordered_keys = ordered_keys.to_vec(); + // Check for duplicates + let mut seen = HashSet::new(); + for key in ordered_keys.iter() { + if !seen.insert(key) { + panic!("Duplicate key in OrderedGetStore: {}", key); + } + } + + let state = KeysAndWakers { + ordered_keys: ordered_keys.into(), + wakers: HashMap::new(), + }; + + Self { + inner, + state: Mutex::new(state), + } + } + } + + impl std::fmt::Display for OrderedGetStore { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let state = self.state.lock().unwrap(); + write!(f, "OrderedGetStore({:?})", state.ordered_keys) + } + } + + #[async_trait::async_trait] + impl ObjectStore for OrderedGetStore { + async fn put(&self, location: &Path, payload: PutPayload) -> Result { + self.inner.put(location, payload).await + } + + async fn put_opts( + &self, + location: &Path, + payload: PutPayload, + opts: PutOptions, + ) -> Result { + self.inner.put_opts(location, payload, opts).await + } + + async fn put_multipart(&self, location: &Path) -> Result> { + self.inner.put_multipart(location).await + } + + async fn put_multipart_opts( + &self, + location: &Path, + opts: PutMultipartOpts, + ) -> Result> { + self.inner.put_multipart_opts(location, opts).await + } + + // A GET request is fulfilled by checking if the requested path is next in order: + // - if yes, remove the path from the queue and proceed with the GET request, then wake the + // next path in order + // - if no, register the waker and wait + async fn get(&self, location: &Path) -> Result { + // Do the actual GET request first, then introduce any artificial ordering delays as needed + let result = self.inner.get(location).await; + + // we implement a future which only resolves once the requested path is next in order + future::poll_fn(move |cx| { + let mut state = self.state.lock().unwrap(); + let Some(next_key) = state.ordered_keys.front() else { + panic!("Ran out of keys before {location}"); + }; + if next_key == location { + // We are next in line. Nobody else can remove our key, and our successor + // cannot race with us to register itself because we hold the lock. + // + // first, remove our key from the queue. + // + // note: safe to unwrap because we just checked that the front key exists (and + // is the same as our requested location) + state.ordered_keys.pop_front().unwrap(); + + // there are three possible cases, either: + // 1. the key has already been requested, hence there is a waker waiting, and we + // need to wake it up + // 2. the next key has no waker registered, in which case we do nothing, and + // whenever the request for said key is made, it will either be next in line + // or a waker will be registered - either case ensuring that the request is + // completed + // 3. the next key is the last key in the queue, in which case there is nothing + // left to do (no need to wake anyone) + if let Some(next_key) = state.ordered_keys.front().cloned() { + if let Some(waker) = state.wakers.remove(&next_key) { + waker.wake(); // NOTE: Not async, returns instantly. + } + } + Poll::Ready(()) + } else { + // We are not next in line, so wait on our key. Nobody can race to remove it + // because we own it; nobody can race to wake us because we hold the lock. + if state + .wakers + .insert(location.clone(), cx.waker().clone()) + .is_some() + { + panic!("Somebody else is already waiting on {location}"); + } + Poll::Pending + } + }) + .await; + + // When we return this result, the future succeeds instantly. Any pending wake() call + // will not be processed before the next time we yield -- unless our executor is + // multi-threaded and happens to have another thread available. In that case, the + // serialization point is the moment our next-key poll_fn issues the wake call (or + // proves no wake is needed). + result + } + + async fn get_opts(&self, location: &Path, options: GetOptions) -> Result { + self.inner.get_opts(location, options).await + } + + async fn get_range(&self, location: &Path, range: Range) -> Result { + self.inner.get_range(location, range).await + } + + async fn get_ranges(&self, location: &Path, ranges: &[Range]) -> Result> { + self.inner.get_ranges(location, ranges).await + } + + async fn head(&self, location: &Path) -> Result { + self.inner.head(location).await + } + + async fn delete(&self, location: &Path) -> Result<()> { + self.inner.delete(location).await + } + + fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result> { + self.inner.list(prefix) + } + + fn list_with_offset( + &self, + prefix: Option<&Path>, + offset: &Path, + ) -> BoxStream<'_, Result> { + self.inner.list_with_offset(prefix, offset) + } + + async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result { + self.inner.list_with_delimiter(prefix).await + } + + async fn copy(&self, from: &Path, to: &Path) -> Result<()> { + self.inner.copy(from, to).await + } + + async fn rename(&self, from: &Path, to: &Path) -> Result<()> { + self.inner.rename(from, to).await + } + + async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { + self.inner.copy_if_not_exists(from, to).await + } + + async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> { + self.inner.rename_if_not_exists(from, to).await + } + } fn string_array_to_engine_data(string_array: StringArray) -> Box { let string_field = Arc::new(Field::new("a", DataType::Utf8, true)); @@ -273,7 +532,7 @@ mod tests { "./tests/data/table-with-dv-small/_delta_log/00000000000000000000.json", )) .unwrap(); - let url = url::Url::from_file_path(path).unwrap(); + let url = Url::from_file_path(path).unwrap(); let location = Path::from(url.path()); let meta = store.head(&location).await.unwrap(); @@ -286,21 +545,188 @@ mod tests { let handler = DefaultJsonHandler::new(store, Arc::new(TokioBackgroundExecutor::new())); let physical_schema = Arc::new(ArrowSchema::try_from(get_log_schema().as_ref()).unwrap()); let data: Vec = handler - .read_json_files(files, Arc::new(physical_schema.try_into().unwrap()), None) + .read_json_files(files, get_log_schema().clone(), None) .unwrap() - .map(|ed_res| { - // TODO(nick) make this easier - ed_res.and_then(|ed| { - ed.into_any() - .downcast::() - .map_err(|_| Error::engine_data_type("ArrowEngineData")) - .map(|sd| sd.into()) - }) - }) + .map_ok(into_record_batch) .try_collect() .unwrap(); assert_eq!(data.len(), 1); assert_eq!(data[0].num_rows(), 4); + + // limit batch size + let handler = handler.with_batch_size(2); + let data: Vec = handler + .read_json_files(files, Arc::new(physical_schema.try_into().unwrap()), None) + .unwrap() + .map_ok(into_record_batch) + .try_collect() + .unwrap(); + + assert_eq!(data.len(), 2); + assert_eq!(data[0].num_rows(), 2); + assert_eq!(data[1].num_rows(), 2); + } + + #[tokio::test] + async fn test_ordered_get_store() { + // note we don't want to go over 1000 since we only buffer 1000 requests at a time + let num_paths = 1000; + let ordered_paths: Vec = (0..num_paths) + .map(|i| Path::from(format!("/test/path{}", i))) + .collect(); + let jumbled_paths: Vec<_> = ordered_paths[100..400] + .iter() + .chain(ordered_paths[400..].iter().rev()) + .chain(ordered_paths[..100].iter()) + .cloned() + .collect(); + + let memory_store = InMemory::new(); + for (i, path) in ordered_paths.iter().enumerate() { + memory_store + .put(path, Bytes::from(format!("content_{}", i)).into()) + .await + .unwrap(); + } + + // Create ordered store with natural order (0, 1, 2, ...) + let ordered_store = Arc::new(OrderedGetStore::new(memory_store, &ordered_paths)); + + let (tx, rx) = mpsc::channel(); + + // Spawn tasks to GET each path in our somewhat jumbled order + // They should complete in order (0, 1, 2, ...) due to OrderedGetStore + let handles = jumbled_paths.into_iter().map(|path| { + let store = ordered_store.clone(); + let tx = tx.clone(); + tokio::spawn(async move { + let _ = store.get(&path).await.unwrap(); + tx.send(path).unwrap(); + }) + }); + + // TODO(zach): we need to join all the handles otherwise none of the tasks run? despite the + // docs? + future::join_all(handles).await; + drop(tx); + + // NB (from mpsc::Receiver::recv): This function will always block the current thread if + // there is no data available and it's possible for more data to be sent (at least one + // sender still exists). + let mut completed = Vec::new(); + while let Ok(path) = rx.recv() { + completed.push(path); + } + + assert_eq!( + completed, + ordered_paths.into_iter().collect_vec(), + "Expected paths to complete in order" + ); + } + + #[tokio::test(flavor = "multi_thread", worker_threads = 3)] + async fn test_read_json_files_ordering() { + // this test checks that the read_json_files method returns the files in order in the + // presence of an ObjectStore (OrderedGetStore) that resolves paths in a jumbled order: + // 1. we set up a list of FileMetas (and some random JSON content) in order + // 2. we then set up an ObjectStore to resolves those paths in a jumbled order + // 3. then call read_json_files and check that the results are in order + let ordered_paths: Vec = (0..1000) + .map(|i| Path::from(format!("test/path{}", i))) + .collect(); + + let test_list: &[(usize, Vec)] = &[ + // test 1: buffer_size = 1000, just 1000 jumbled paths + ( + 1000, // buffer_size + ordered_paths[100..400] + .iter() + .chain(ordered_paths[400..].iter().rev()) + .chain(ordered_paths[..100].iter()) + .cloned() + .collect(), + ), + // test 2: buffer_size = 4, jumbled paths in groups of 4 + ( + 4, // buffer_size + (0..250) + .flat_map(|i| { + [ + ordered_paths[1 + 4 * i].clone(), + ordered_paths[4 * i].clone(), + ordered_paths[3 + 4 * i].clone(), + ordered_paths[2 + 4 * i].clone(), + ] + }) + .collect_vec(), + ), + ]; + + let memory_store = InMemory::new(); + for (i, path) in ordered_paths.iter().enumerate() { + memory_store + .put(path, Bytes::from(format!("{{\"val\": {i}}}")).into()) + .await + .unwrap(); + } + + for (buffer_size, jumbled_paths) in test_list { + // set up our ObjectStore to resolve paths in a jumbled order + let store = Arc::new(OrderedGetStore::new(memory_store.fork(), jumbled_paths)); + + // convert the paths to FileMeta + let ordered_file_meta: Vec<_> = ordered_paths + .iter() + .map(|path| { + let store = store.clone(); + async move { + let url = Url::parse(&format!("memory:/{}", path)).unwrap(); + let location = Path::from(path.as_ref()); + let meta = store.head(&location).await.unwrap(); + FileMeta { + location: url, + last_modified: meta.last_modified.timestamp_millis(), + size: meta.size, + } + } + }) + .collect(); + + // note: join_all is ordered + let files = future::join_all(ordered_file_meta).await; + + // fire off the read_json_files call (for all the files in order) + let handler = DefaultJsonHandler::new( + store, + Arc::new(TokioMultiThreadExecutor::new( + tokio::runtime::Handle::current(), + )), + ); + let handler = handler.with_buffer_size(*buffer_size); + let schema = Arc::new(ArrowSchema::new(vec![Arc::new(Field::new( + "val", + DataType::Int32, + true, + ))])); + let physical_schema = Arc::new(schema.try_into().unwrap()); + let data: Vec = handler + .read_json_files(&files, physical_schema, None) + .unwrap() + .map_ok(into_record_batch) + .try_collect() + .unwrap(); + + // check the order + let all_values: Vec = data + .iter() + .flat_map(|batch| { + let val_col: &Int32Array = batch.column(0).as_primitive(); + (0..val_col.len()).map(|i| val_col.value(i)).collect_vec() + }) + .collect(); + assert_eq!(all_values, (0..1000).collect_vec()); + } } } From b4ab4a911942582fb13778256847814ad454e625 Mon Sep 17 00:00:00 2001 From: Zach Schuermann Date: Mon, 3 Mar 2025 19:44:33 -0800 Subject: [PATCH 22/38] release 0.8.0 (#726) release 0.8.0 --- CHANGELOG.md | 31 +++++++++++++++++++++++++++++++ Cargo.toml | 2 +- README.md | 4 ++-- ffi/Cargo.toml | 2 +- kernel/Cargo.toml | 4 +++- 5 files changed, 38 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 49f89018e5..acd0edfc79 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,36 @@ # Changelog +## [v0.8.0](https://github.com/delta-io/delta-kernel-rs/tree/v0.8.0/) (2025-03-04) + +[Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.7.0...v0.8.0) + +### 🏗️ Breaking changes + +1. ffi: `get_partition_column_count` and `get_partition_columns` now take a `Snapshot` instead of a + `Scan` ([#697]) +2. ffi: expression visitor callback `visit_literal_decimal` now takes `i64` for the upper half of a 128-bit int value ([#724]) +3. - `DefaultJsonHandler::with_readahead()` renamed to `DefaultJsonHandler::with_buffer_size()` ([#711]) +4. DefaultJsonHandler's defaults changed: + - default buffer size: 10 => 1000 requests/files + - default batch size: 1024 => 1000 rows +5. Bump MSRV to rustc 1.81 ([#725]) + +### 🐛 Bug Fixes + +1. Pin `chrono` version to fix arrow compilation failure ([#719]) + +### ⚡ Performance + +1. Replace default engine JSON reader's `FileStream` with concurrent futures ([#711]) + + +[#719]: https://github.com/delta-io/delta-kernel-rs/pull/719 +[#724]: https://github.com/delta-io/delta-kernel-rs/pull/724 +[#697]: https://github.com/delta-io/delta-kernel-rs/pull/697 +[#725]: https://github.com/delta-io/delta-kernel-rs/pull/725 +[#711]: https://github.com/delta-io/delta-kernel-rs/pull/711 + + ## [v0.7.0](https://github.com/delta-io/delta-kernel-rs/tree/v0.7.0/) (2025-02-24) [Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.6.1...v0.7.0) diff --git a/Cargo.toml b/Cargo.toml index aa75b3765e..5ecf9b465c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ license = "Apache-2.0" repository = "https://github.com/delta-io/delta-kernel-rs" readme = "README.md" rust-version = "1.81" -version = "0.7.0" +version = "0.8.0" [workspace.dependencies] object_store = { version = ">=0.11, <0.12" } diff --git a/README.md b/README.md index 1cddea1a8e..47cb6bc125 100644 --- a/README.md +++ b/README.md @@ -43,10 +43,10 @@ consumer's own `Engine` trait, the kernel has a feature flag to enable a default ```toml # fewer dependencies, requires consumer to implement Engine trait. # allows consumers to implement their own in-memory format -delta_kernel = "0.7.0" +delta_kernel = "0.8.0" # or turn on the default engine, based on arrow -delta_kernel = { version = "0.7.0", features = ["default-engine"] } +delta_kernel = { version = "0.8.0", features = ["default-engine"] } ``` ### Feature flags diff --git a/ffi/Cargo.toml b/ffi/Cargo.toml index fe0aa97eb8..663c22d61d 100644 --- a/ffi/Cargo.toml +++ b/ffi/Cargo.toml @@ -25,7 +25,7 @@ delta_kernel = { path = "../kernel", default-features = false, features = [ "arrow", "developer-visibility", ] } -delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.7.0" } +delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.8.0" } [build-dependencies] cbindgen = "0.28" diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index b064e3a9ad..5bc607c2a2 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -30,6 +30,8 @@ pre-release-hook = [ "--unreleased", "--prepend", "../CHANGELOG.md", + "--include-path", + "*", "--tag", "{{version}}", ] @@ -51,7 +53,7 @@ uuid = "1.10.0" z85 = "3.0.5" # bring in our derive macros -delta_kernel_derive = { path = "../derive-macros", version = "0.7.0" } +delta_kernel_derive = { path = "../derive-macros", version = "0.8.0" } # used for developer-visibility visibility = "0.1.1" From 2f6c0490d0280c559e63c6360af6e23083c358d1 Mon Sep 17 00:00:00 2001 From: Sebastian Tia <75666019+sebastiantia@users.noreply.github.com> Date: Wed, 5 Mar 2025 14:56:20 -0800 Subject: [PATCH 23/38] feat: extract & insert sidecar batches in `replay`'s action iterator (#679) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What changes are proposed in this pull request? ### Summary This PR introduces foundational changes required for V2 checkpoint read support. The high-level changes required for v2 checkpoint support are: Item 1. Allow log segments to be built with V2 checkpoint files Item 2. Allow log segment `replay` functionality to retrieve actions from sidecar files if need be. This PR specifically adds support for Item 2. This PR **does not introduce full v2Checkpoints reader/writer support** as we are missing support for Item 1, meaning log segments can never have V2 checkpoint files in the first place. That functionality will be completed in [PR #685](https://github.com/delta-io/delta-kernel-rs/pull/685) which is stacked on top of this PR. However, the changes to log `replay` done here are compatible with tables using V1 checkpoints, allowing us to safely merge the changes here. ### Changes For each batch of `EngineData` from a checkpoint file: 1. Use the new `SidecarVisitor` to scan each batch for sidecar file paths embedded in sidecar actions. 3. If sidecar file paths exist: - Read the corresponding sidecar files. - Generate an iterator over batches of actions within the sidecar files. - Insert the sidecar batches that contain the add actions necessary to reconstruct the table’s state into the top level iterator **- Note: the original checkpoint batch is still included in the iterator** 4. If no sidecar file paths exist, move to the next batch & leave the original checkpoint batch in the iterator. Notes: - If the `checkpoint_read_schema` does not have file actions, we do not need to scan the batch with the `SidecarVisitor` and can leave the batch as-is in the top-level iterator. - Multi-part checkpoints do not have sidecar actions, so we do not need to scan the batch with the `SidecarVisitor` and can leave the batch as-is in the top-level iterator. - A batch may not include add actions, but other actions (like txn, metadata, protocol). This is safe to leave in the iterator as the non-file actions will be ignored. resolves https://github.com/delta-io/delta-kernel-rs/issues/670 ## How was this change tested? Although log segments can not yet have V2 checkpoints, we can easily mock batches that include sidecar actions that we can encounter in V2 checkpoints. - `test_sidecar_to_filemeta_valid_paths` - Tests handling of sidecar paths that can either be: - A relative path within the _delta_log/_sidecars directory, but it is just file-name - paths that are relative and have a parent (i.e. directory component) - An absolute path. **Unit tests for process_single_checkpoint_batch:** - `test_checkpoint_batch_with_no_sidecars_returns_none` - Verifies that if no sidecar actions are present, the checkpoint batch is returned unchanged. - `test_checkpoint_batch_with_sidecars_returns_sidecar_batches` - Ensures that when sidecars are present, the corresponding sidecar files are read, and their batches are returned. - `test_checkpoint_batch_with_sidecar_files_that_do_not_exist` - Tests behavior when sidecar files referenced in the checkpoint batch do not exist, ensuring an error is returned. - `test_reading_sidecar_files_with_predicate` - Tests that sidecar files that do not match the passed predicate are skipped correctly **Unit tests for create_checkpoint_stream:** - `test_create_checkpoint_stream_errors_when_schema_has_remove_but_no_sidecar_action` - Validates that if the schema includes the remove action, it must also contain the sidecar column. - `test_create_checkpoint_stream_errors_when_schema_has_add_but_no_sidecar_action` - Validates that if the schema includes the add action, it must also contain the sidecar column. - `test_create_checkpoint_stream_returns_checkpoint_batches_as_is_if_schema_has_no_file_actions` - Checks that if the schema has no file actions, the checkpoint batches are returned unchanged - `test_create_checkpoint_stream_returns_checkpoint_batches_if_checkpoint_is_multi_part` - Ensures that for multi-part checkpoints, the batch is not visited, and checkpoint batches are returned as-is. - `test_create_checkpoint_stream_reads_parquet_checkpoint_batch_without_sidecars` - Tests reading a Parquet checkpoint batch and verifying it matches the expected result. - `test_create_checkpoint_stream_reads_json_checkpoint_batch_without_sidecars` - Verifies that JSON checkpoint batches are read correctly - `test_create_checkpoint_stream_reads_checkpoint_batch_with_sidecar` - Test ensuring that checkpoint files containing sidecar references return the additional corresponding sidecar batches correctly --- kernel/src/actions/mod.rs | 23 +- kernel/src/actions/visitors.rs | 5 +- kernel/src/log_segment.rs | 150 +++++++- kernel/src/log_segment/tests.rs | 646 ++++++++++++++++++++++++++++++-- kernel/src/scan/log_replay.rs | 7 +- kernel/src/scan/mod.rs | 50 ++- kernel/src/scan/state.rs | 3 +- kernel/src/schema/mod.rs | 5 + kernel/src/utils.rs | 21 +- 9 files changed, 859 insertions(+), 51 deletions(-) diff --git a/kernel/src/actions/mod.rs b/kernel/src/actions/mod.rs index cb0a9c9cda..105571289f 100644 --- a/kernel/src/actions/mod.rs +++ b/kernel/src/actions/mod.rs @@ -16,7 +16,8 @@ use crate::table_features::{ }; use crate::table_properties::TableProperties; use crate::utils::require; -use crate::{DeltaResult, EngineData, Error, RowVisitor as _}; +use crate::{DeltaResult, EngineData, Error, FileMeta, RowVisitor as _}; +use url::Url; use visitors::{MetadataVisitor, ProtocolVisitor}; use delta_kernel_derive::Schema; @@ -524,7 +525,6 @@ pub struct SetTransaction { /// file actions. This action is only allowed in checkpoints following the V2 spec. /// /// [More info]: https://github.com/delta-io/delta/blob/master/PROTOCOL.md#sidecar-file-information -#[allow(unused)] //TODO: Remove once we implement V2 checkpoint file processing #[derive(Schema, Debug, PartialEq)] #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) struct Sidecar { @@ -547,6 +547,25 @@ pub(crate) struct Sidecar { pub tags: Option>, } +impl Sidecar { + /// Convert a Sidecar record to a FileMeta. + /// + /// This helper first builds the URL by joining the provided log_root with + /// the "_sidecars/" folder and the given sidecar path. + pub(crate) fn to_filemeta(&self, log_root: &Url) -> DeltaResult { + Ok(FileMeta { + location: log_root.join("_sidecars/")?.join(&self.path)?, + last_modified: self.modification_time, + size: self.size_in_bytes.try_into().map_err(|_| { + Error::generic(format!( + "Failed to convert sidecar size {} to usize", + self.size_in_bytes + )) + })?, + }) + } +} + #[cfg(test)] mod tests { use std::sync::Arc; diff --git a/kernel/src/actions/visitors.rs b/kernel/src/actions/visitors.rs index 72747ac6a1..36a2c7faf7 100644 --- a/kernel/src/actions/visitors.rs +++ b/kernel/src/actions/visitors.rs @@ -352,7 +352,7 @@ impl RowVisitor for CdcVisitor { )) ); for i in 0..row_count { - // Since path column is required, use it to detect presence of an Add action + // Since path column is required, use it to detect presence of a Cdc action if let Some(path) = getters[0].get_opt(i, "cdc.path")? { self.cdcs.push(Self::visit_cdc(i, path, getters)?); } @@ -438,7 +438,6 @@ impl RowVisitor for SetTransactionVisitor { } } -#[allow(unused)] //TODO: Remove once we implement V2 checkpoint file processing #[derive(Default)] #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) struct SidecarVisitor { @@ -475,7 +474,7 @@ impl RowVisitor for SidecarVisitor { )) ); for i in 0..row_count { - // Since path column is required, use it to detect presence of a sidecar action + // Since path column is required, use it to detect presence of a Sidecar action if let Some(path) = getters[0].get_opt(i, "sidecar.path")? { self.sidecars.push(Self::visit_sidecar(i, path, getters)?); } diff --git a/kernel/src/log_segment.rs b/kernel/src/log_segment.rs index b4f255c576..e765ca743b 100644 --- a/kernel/src/log_segment.rs +++ b/kernel/src/log_segment.rs @@ -1,13 +1,18 @@ //! Represents a segment of a delta log. [`LogSegment`] wraps a set of checkpoint and commit //! files. -use crate::actions::{get_log_schema, Metadata, Protocol, METADATA_NAME, PROTOCOL_NAME}; +use crate::actions::visitors::SidecarVisitor; +use crate::actions::{ + get_log_schema, Metadata, Protocol, ADD_NAME, METADATA_NAME, PROTOCOL_NAME, REMOVE_NAME, + SIDECAR_NAME, +}; use crate::path::{LogPathFileType, ParsedLogPath}; use crate::schema::SchemaRef; use crate::snapshot::CheckpointMetadata; use crate::utils::require; use crate::{ - DeltaResult, Engine, EngineData, Error, Expression, ExpressionRef, FileSystemClient, Version, + DeltaResult, Engine, EngineData, Error, Expression, ExpressionRef, FileSystemClient, + ParquetHandler, RowVisitor, Version, }; use itertools::Itertools; use std::collections::HashMap; @@ -213,17 +218,146 @@ impl LogSegment { .read_json_files(&commit_files, commit_read_schema, meta_predicate.clone())? .map_ok(|batch| (batch, true)); - let checkpoint_parts: Vec<_> = self + let checkpoint_stream = + self.create_checkpoint_stream(engine, checkpoint_read_schema, meta_predicate)?; + + Ok(commit_stream.chain(checkpoint_stream)) + } + + /// Returns an iterator over checkpoint data, processing sidecar files when necessary. + /// + /// By default, `create_checkpoint_stream` checks for the presence of sidecar files, and + /// reads their contents if present. Checking for sidecar files is skipped if: + /// - The checkpoint is a multi-part checkpoint + /// - The checkpoint read schema does not contain a file action + /// + /// For single-part checkpoints, any referenced sidecar files are processed. These + /// sidecar files contain the actual file actions that would otherwise be + /// stored directly in the checkpoint. The sidecar file batches are chained to the + /// checkpoint batch in the top level iterator to be returned. + fn create_checkpoint_stream( + &self, + engine: &dyn Engine, + checkpoint_read_schema: SchemaRef, + meta_predicate: Option, + ) -> DeltaResult, bool)>> + Send> { + let need_file_actions = checkpoint_read_schema.contains(ADD_NAME) + || checkpoint_read_schema.contains(REMOVE_NAME); + require!( + !need_file_actions || checkpoint_read_schema.contains(SIDECAR_NAME), + Error::invalid_checkpoint( + "If the checkpoint read schema contains file actions, it must contain the sidecar column" + ) + ); + + let checkpoint_file_meta: Vec<_> = self .checkpoint_parts .iter() .map(|f| f.location.clone()) .collect(); - let checkpoint_stream = engine - .get_parquet_handler() - .read_parquet_files(&checkpoint_parts, checkpoint_read_schema, meta_predicate)? - .map_ok(|batch| (batch, false)); - Ok(commit_stream.chain(checkpoint_stream)) + let parquet_handler = engine.get_parquet_handler(); + + // Historically, we had a shared file reader trait for JSON and Parquet handlers, + // but it was removed to avoid unnecessary coupling. This is a concrete case + // where it *could* have been useful, but for now, we're keeping them separate. + // If similar patterns start appearing elsewhere, we should reconsider that decision. + let actions = match self.checkpoint_parts.first() { + Some(parsed_log_path) if parsed_log_path.extension == "json" => { + engine.get_json_handler().read_json_files( + &checkpoint_file_meta, + checkpoint_read_schema.clone(), + meta_predicate.clone(), + )? + } + Some(parsed_log_path) if parsed_log_path.extension == "parquet" => parquet_handler + .read_parquet_files( + &checkpoint_file_meta, + checkpoint_read_schema.clone(), + meta_predicate.clone(), + )?, + Some(parsed_log_path) => { + return Err(Error::generic(format!( + "Unsupported checkpoint file type: {}", + parsed_log_path.extension, + ))); + } + // This is the case when there are no checkpoints in the log segment + // so we return an empty iterator + None => Box::new(std::iter::empty()), + }; + + let log_root = self.log_root.clone(); + + let actions_iter = actions + .map(move |checkpoint_batch_result| -> DeltaResult<_> { + let checkpoint_batch = checkpoint_batch_result?; + // This closure maps the checkpoint batch to an iterator of batches + // by chaining the checkpoint batch with sidecar batches if they exist. + + // 1. In the case where the schema does not contain file actions, we return the + // checkpoint batch directly as sidecar files only have to be read when the + // schema contains add/remove action. + // 2. Multi-part checkpoint batches never have sidecar actions, so the batch is + // returned as-is. + let sidecar_content = if need_file_actions && checkpoint_file_meta.len() == 1 { + Self::process_sidecars( + parquet_handler.clone(), // cheap Arc clone + log_root.clone(), + checkpoint_batch.as_ref(), + checkpoint_read_schema.clone(), + meta_predicate.clone(), + )? + } else { + None + }; + + let combined_batches = std::iter::once(Ok(checkpoint_batch)) + .chain(sidecar_content.into_iter().flatten()) + // The boolean flag indicates whether the batch originated from a commit file + // (true) or a checkpoint file (false). + .map_ok(|sidecar_batch| (sidecar_batch, false)); + + Ok(combined_batches) + }) + .flatten_ok() + .map(|result| result?); // result-result to result + + Ok(actions_iter) + } + + /// Processes sidecar files for the given checkpoint batch. + /// + /// This function extracts any sidecar file references from the provided batch. + /// Each sidecar file is read and an iterator of file action batches is returned + fn process_sidecars( + parquet_handler: Arc, + log_root: Url, + batch: &dyn EngineData, + checkpoint_read_schema: SchemaRef, + meta_predicate: Option, + ) -> DeltaResult>> + Send>> { + // Visit the rows of the checkpoint batch to extract sidecar file references + let mut visitor = SidecarVisitor::default(); + visitor.visit_rows_of(batch)?; + + // If there are no sidecar files, return early + if visitor.sidecars.is_empty() { + return Ok(None); + } + + let sidecar_files: Vec<_> = visitor + .sidecars + .iter() + .map(|sidecar| sidecar.to_filemeta(&log_root)) + .try_collect()?; + + // Read the sidecar files and return an iterator of sidecar file batches + Ok(Some(parquet_handler.read_parquet_files( + &sidecar_files, + checkpoint_read_schema, + meta_predicate, + )?)) } // Get the most up-to-date Protocol and Metadata actions diff --git a/kernel/src/log_segment/tests.rs b/kernel/src/log_segment/tests.rs index 0ab9ee6c0c..158015ef7b 100644 --- a/kernel/src/log_segment/tests.rs +++ b/kernel/src/log_segment/tests.rs @@ -1,15 +1,33 @@ +use std::sync::LazyLock; use std::{path::PathBuf, sync::Arc}; +use futures::executor::block_on; use itertools::Itertools; use object_store::{memory::InMemory, path::Path, ObjectStore}; use url::Url; +use crate::actions::visitors::AddVisitor; +use crate::actions::{ + get_log_add_schema, get_log_schema, Add, Sidecar, ADD_NAME, METADATA_NAME, REMOVE_NAME, + SIDECAR_NAME, +}; +use crate::engine::arrow_data::ArrowEngineData; use crate::engine::default::executor::tokio::TokioBackgroundExecutor; use crate::engine::default::filesystem::ObjectStoreFileSystemClient; +use crate::engine::default::DefaultEngine; use crate::engine::sync::SyncEngine; use crate::log_segment::LogSegment; +use crate::parquet::arrow::ArrowWriter; +use crate::path::ParsedLogPath; +use crate::scan::test_utils::{ + add_batch_simple, add_batch_with_remove, sidecar_batch_with_given_paths, +}; use crate::snapshot::CheckpointMetadata; -use crate::{FileSystemClient, Table}; +use crate::utils::test_utils::{assert_batch_matches, Action}; +use crate::{ + DeltaResult, Engine, EngineData, Expression, ExpressionRef, FileMeta, FileSystemClient, + RowVisitor, Table, +}; use test_utils::delta_path_for_version; // NOTE: In addition to testing the meta-predicate for metadata replay, this test also verifies @@ -73,27 +91,25 @@ fn build_log_with_paths_and_checkpoint( let data = bytes::Bytes::from("kernel-data"); // add log files to store - tokio::runtime::Runtime::new() - .expect("create tokio runtime") - .block_on(async { - for path in paths { - store - .put(path, data.clone().into()) - .await - .expect("put log file in store"); - } - if let Some(checkpoint_metadata) = checkpoint_metadata { - let checkpoint_str = - serde_json::to_string(checkpoint_metadata).expect("Serialize checkpoint"); - store - .put( - &Path::from("_delta_log/_last_checkpoint"), - checkpoint_str.into(), - ) - .await - .expect("Write _last_checkpoint"); - } - }); + block_on(async { + for path in paths { + store + .put(path, data.clone().into()) + .await + .expect("put log file in store"); + } + if let Some(checkpoint_metadata) = checkpoint_metadata { + let checkpoint_str = + serde_json::to_string(checkpoint_metadata).expect("Serialize checkpoint"); + store + .put( + &Path::from("_delta_log/_last_checkpoint"), + checkpoint_str.into(), + ) + .await + .expect("Write _last_checkpoint"); + } + }); let client = ObjectStoreFileSystemClient::new( store, @@ -106,6 +122,93 @@ fn build_log_with_paths_and_checkpoint( (Box::new(client), log_root) } +// Create an in-memory store and return the store and the URL for the store's _delta_log directory. +fn new_in_memory_store() -> (Arc, Url) { + ( + Arc::new(InMemory::new()), + Url::parse("memory:///") + .unwrap() + .join("_delta_log/") + .unwrap(), + ) +} + +// Writes a record batch obtained from engine data to the in-memory store at a given path. +fn write_parquet_to_store( + store: &Arc, + path: String, + data: Box, +) -> DeltaResult<()> { + let batch = ArrowEngineData::try_from_engine_data(data)?; + let record_batch = batch.record_batch(); + + let mut buffer = vec![]; + let mut writer = ArrowWriter::try_new(&mut buffer, record_batch.schema(), None)?; + writer.write(record_batch)?; + writer.close()?; + + block_on(async { store.put(&Path::from(path), buffer.into()).await })?; + + Ok(()) +} + +/// Writes all actions to a _delta_log parquet checkpoint file in the store. +/// This function formats the provided filename into the _delta_log directory. +fn add_checkpoint_to_store( + store: &Arc, + data: Box, + filename: &str, +) -> DeltaResult<()> { + let path = format!("_delta_log/{}", filename); + write_parquet_to_store(store, path, data) +} + +/// Writes all actions to a _delta_log/_sidecars file in the store. +/// This function formats the provided filename into the _sidecars subdirectory. +fn add_sidecar_to_store( + store: &Arc, + data: Box, + filename: &str, +) -> DeltaResult<()> { + let path = format!("_delta_log/_sidecars/{}", filename); + write_parquet_to_store(store, path, data) +} + +/// Writes all actions to a _delta_log json checkpoint file in the store. +/// This function formats the provided filename into the _delta_log directory. +fn write_json_to_store( + store: &Arc, + actions: Vec, + filename: &str, +) -> DeltaResult<()> { + let json_lines: Vec = actions + .into_iter() + .map(|action| serde_json::to_string(&action).expect("action to string")) + .collect(); + let content = json_lines.join("\n"); + let checkpoint_path = format!("_delta_log/{}", filename); + + tokio::runtime::Runtime::new() + .expect("create tokio runtime") + .block_on(async { + store + .put(&Path::from(checkpoint_path), content.into()) + .await + })?; + + Ok(()) +} + +fn create_log_path(path: &str) -> ParsedLogPath { + ParsedLogPath::try_from(FileMeta { + location: Url::parse(path).expect("Invalid file URL"), + last_modified: 0, + size: 0, + }) + .unwrap() + .unwrap() +} + #[test] fn build_snapshot_with_unsupported_uuid_checkpoint() { let (client, log_root) = build_log_with_paths_and_checkpoint( @@ -122,7 +225,6 @@ fn build_snapshot_with_unsupported_uuid_checkpoint() { ], None, ); - let log_segment = LogSegment::for_snapshot(client.as_ref(), log_root, None, None).unwrap(); let commit_files = log_segment.ascending_commit_files; let checkpoint_parts = log_segment.checkpoint_parts; @@ -619,3 +721,501 @@ fn table_changes_fails_with_larger_start_version_than_end() { let log_segment_res = LogSegment::for_table_changes(client.as_ref(), log_root, 1, Some(0)); assert!(log_segment_res.is_err()); } +#[test] +fn test_sidecar_to_filemeta_valid_paths() -> DeltaResult<()> { + let log_root = Url::parse("file:///var/_delta_log/")?; + let test_cases = [ + ( + "example.parquet", + "file:///var/_delta_log/_sidecars/example.parquet", + ), + ( + "file:///var/_delta_log/_sidecars/example.parquet", + "file:///var/_delta_log/_sidecars/example.parquet", + ), + ( + "test/test/example.parquet", + "file:///var/_delta_log/_sidecars/test/test/example.parquet", + ), + ]; + + for (input_path, expected_url) in test_cases.into_iter() { + let sidecar = Sidecar { + path: expected_url.to_string(), + modification_time: 0, + size_in_bytes: 1000, + tags: None, + }; + + let filemeta = sidecar.to_filemeta(&log_root)?; + assert_eq!( + filemeta.location.as_str(), + expected_url, + "Mismatch for input path: {}", + input_path + ); + } + Ok(()) +} + +#[test] +fn test_checkpoint_batch_with_no_sidecars_returns_none() -> DeltaResult<()> { + let (_, log_root) = new_in_memory_store(); + let engine = Arc::new(SyncEngine::new()); + let checkpoint_batch = add_batch_simple(get_log_schema().clone()); + + let mut iter = LogSegment::process_sidecars( + engine.get_parquet_handler(), + log_root, + checkpoint_batch.as_ref(), + get_log_schema().project(&[ADD_NAME, REMOVE_NAME, SIDECAR_NAME])?, + None, + )? + .into_iter() + .flatten(); + + // Assert no batches are returned + assert!(iter.next().is_none()); + + Ok(()) +} + +#[test] +fn test_checkpoint_batch_with_sidecars_returns_sidecar_batches() -> DeltaResult<()> { + let (store, log_root) = new_in_memory_store(); + let engine = DefaultEngine::new( + store.clone(), + Path::from("/"), + Arc::new(TokioBackgroundExecutor::new()), + ); + let read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME, SIDECAR_NAME])?; + + add_sidecar_to_store( + &store, + add_batch_simple(read_schema.clone()), + "sidecarfile1.parquet", + )?; + add_sidecar_to_store( + &store, + add_batch_with_remove(read_schema.clone()), + "sidecarfile2.parquet", + )?; + + let checkpoint_batch = sidecar_batch_with_given_paths( + vec!["sidecarfile1.parquet", "sidecarfile2.parquet"], + read_schema.clone(), + ); + + let mut iter = LogSegment::process_sidecars( + engine.get_parquet_handler(), + log_root, + checkpoint_batch.as_ref(), + read_schema.clone(), + None, + )? + .into_iter() + .flatten(); + + // Assert the correctness of batches returned + assert_batch_matches(iter.next().unwrap()?, add_batch_simple(read_schema.clone())); + assert_batch_matches(iter.next().unwrap()?, add_batch_with_remove(read_schema)); + assert!(iter.next().is_none()); + + Ok(()) +} + +#[test] +fn test_checkpoint_batch_with_sidecar_files_that_do_not_exist() -> DeltaResult<()> { + let (store, log_root) = new_in_memory_store(); + let engine = DefaultEngine::new( + store.clone(), + Path::from("/"), + Arc::new(TokioBackgroundExecutor::new()), + ); + + let checkpoint_batch = sidecar_batch_with_given_paths( + vec!["sidecarfile1.parquet", "sidecarfile2.parquet"], + get_log_schema().clone(), + ); + + let mut iter = LogSegment::process_sidecars( + engine.get_parquet_handler(), + log_root, + checkpoint_batch.as_ref(), + get_log_schema().project(&[ADD_NAME, REMOVE_NAME, SIDECAR_NAME])?, + None, + )? + .into_iter() + .flatten(); + + // Assert that an error is returned when trying to read sidecar files that do not exist + let err = iter.next().unwrap(); + assert!(err.is_err()); + + Ok(()) +} + +#[test] +fn test_reading_sidecar_files_with_predicate() -> DeltaResult<()> { + let (store, log_root) = new_in_memory_store(); + let engine = DefaultEngine::new( + store.clone(), + Path::from("/"), + Arc::new(TokioBackgroundExecutor::new()), + ); + let read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME, SIDECAR_NAME])?; + + let checkpoint_batch = + sidecar_batch_with_given_paths(vec!["sidecarfile1.parquet"], read_schema.clone()); + + // Add a sidecar file with only add actions + add_sidecar_to_store( + &store, + add_batch_simple(read_schema.clone()), + "sidecarfile1.parquet", + )?; + + // Filter out sidecar files that do not contain remove actions + let remove_predicate: LazyLock> = LazyLock::new(|| { + Some(Arc::new( + Expression::column([REMOVE_NAME, "path"]).is_not_null(), + )) + }); + + let mut iter = LogSegment::process_sidecars( + engine.get_parquet_handler(), + log_root, + checkpoint_batch.as_ref(), + read_schema.clone(), + remove_predicate.clone(), + )? + .into_iter() + .flatten(); + + // As the sidecar batch contains only add actions, the batch should be filtered out + assert!(iter.next().is_none()); + + Ok(()) +} + +#[test] +fn test_create_checkpoint_stream_errors_when_schema_has_remove_but_no_sidecar_action( +) -> DeltaResult<()> { + let engine = SyncEngine::new(); + let log_root = Url::parse("s3://example-bucket/logs/")?; + + // Create the stream over checkpoint batches. + let log_segment = LogSegment::try_new( + vec![], + vec![create_log_path("file:///00000000000000000001.parquet")], + log_root, + None, + )?; + let result = log_segment.create_checkpoint_stream( + &engine, + get_log_schema().project(&[REMOVE_NAME])?, + None, + ); + + // Errors because the schema has an REMOVE action but no SIDECAR action. + assert!(result.is_err()); + + Ok(()) +} + +#[test] +fn test_create_checkpoint_stream_errors_when_schema_has_add_but_no_sidecar_action( +) -> DeltaResult<()> { + let engine = SyncEngine::new(); + let log_root = Url::parse("s3://example-bucket/logs/")?; + + // Create the stream over checkpoint batches. + let log_segment = LogSegment::try_new( + vec![], + vec![create_log_path("file:///00000000000000000001.parquet")], + log_root, + None, + )?; + let result = log_segment.create_checkpoint_stream(&engine, get_log_add_schema().clone(), None); + + // Errors because the schema has an ADD action but no SIDECAR action. + assert!(result.is_err()); + + Ok(()) +} + +#[test] +fn test_create_checkpoint_stream_returns_checkpoint_batches_as_is_if_schema_has_no_file_actions( +) -> DeltaResult<()> { + let (store, log_root) = new_in_memory_store(); + let engine = DefaultEngine::new( + store.clone(), + Path::from("/"), + Arc::new(TokioBackgroundExecutor::new()), + ); + add_checkpoint_to_store( + &store, + // Create a checkpoint batch with sidecar actions to verify that the sidecar actions are not read. + sidecar_batch_with_given_paths(vec!["sidecar1.parquet"], get_log_schema().clone()), + "00000000000000000001.checkpoint.parquet", + )?; + + let checkpoint_one_file = log_root + .join("00000000000000000001.checkpoint.parquet")? + .to_string(); + + let v2_checkpoint_read_schema = get_log_schema().project(&[METADATA_NAME])?; + + let log_segment = LogSegment::try_new( + vec![], + vec![create_log_path(&checkpoint_one_file)], + log_root, + None, + )?; + let mut iter = + log_segment.create_checkpoint_stream(&engine, v2_checkpoint_read_schema.clone(), None)?; + + // Assert that the first batch returned is from reading checkpoint file 1 + let (first_batch, is_log_batch) = iter.next().unwrap()?; + assert!(!is_log_batch); + assert_batch_matches( + first_batch, + sidecar_batch_with_given_paths(vec!["sidecar1.parquet"], v2_checkpoint_read_schema), + ); + assert!(iter.next().is_none()); + + Ok(()) +} + +#[test] +fn test_create_checkpoint_stream_returns_checkpoint_batches_if_checkpoint_is_multi_part( +) -> DeltaResult<()> { + let (store, log_root) = new_in_memory_store(); + let engine = DefaultEngine::new( + store.clone(), + Path::from("/"), + Arc::new(TokioBackgroundExecutor::new()), + ); + + // Multi-part checkpoints should never contain sidecar actions. + // This test intentionally includes batches with sidecar actions in multi-part checkpoints + // to verify that the reader does not process them. Instead, the reader should short-circuit + // and return the checkpoint batches as-is when encountering a multi-part checkpoint. + // Note: This is a test-only scenario; real tables should never have multi-part + // checkpoints with sidecar actions. + let checkpoint_part_1 = "00000000000000000001.checkpoint.0000000001.0000000002.parquet"; + let checkpoint_part_2 = "00000000000000000001.checkpoint.0000000002.0000000002.parquet"; + + add_checkpoint_to_store( + &store, + sidecar_batch_with_given_paths(vec!["sidecar1.parquet"], get_log_schema().clone()), + checkpoint_part_1, + )?; + add_checkpoint_to_store( + &store, + sidecar_batch_with_given_paths(vec!["sidecar2.parquet"], get_log_schema().clone()), + checkpoint_part_2, + )?; + + let checkpoint_one_file = log_root.join(checkpoint_part_1)?.to_string(); + let checkpoint_two_file = log_root.join(checkpoint_part_2)?.to_string(); + + let v2_checkpoint_read_schema = get_log_schema().project(&[ADD_NAME, SIDECAR_NAME])?; + + let log_segment = LogSegment::try_new( + vec![], + vec![ + create_log_path(&checkpoint_one_file), + create_log_path(&checkpoint_two_file), + ], + log_root, + None, + )?; + let mut iter = + log_segment.create_checkpoint_stream(&engine, v2_checkpoint_read_schema.clone(), None)?; + + // Assert the correctness of batches returned + for expected_sidecar in ["sidecar1.parquet", "sidecar2.parquet"].iter() { + let (batch, is_log_batch) = iter.next().unwrap()?; + assert!(!is_log_batch); + assert_batch_matches( + batch, + sidecar_batch_with_given_paths( + vec![expected_sidecar], + v2_checkpoint_read_schema.clone(), + ), + ); + } + assert!(iter.next().is_none()); + + Ok(()) +} + +#[test] +fn test_create_checkpoint_stream_reads_parquet_checkpoint_batch_without_sidecars() -> DeltaResult<()> +{ + let (store, log_root) = new_in_memory_store(); + let engine = DefaultEngine::new( + store.clone(), + Path::from("/"), + Arc::new(TokioBackgroundExecutor::new()), + ); + + add_checkpoint_to_store( + &store, + add_batch_simple(get_log_schema().clone()), + "00000000000000000001.checkpoint.parquet", + )?; + + let checkpoint_one_file = log_root + .join("00000000000000000001.checkpoint.parquet")? + .to_string(); + + let v2_checkpoint_read_schema = get_log_schema().project(&[ADD_NAME, SIDECAR_NAME])?; + + let log_segment = LogSegment::try_new( + vec![], + vec![create_log_path(&checkpoint_one_file)], + log_root, + None, + )?; + let mut iter = + log_segment.create_checkpoint_stream(&engine, v2_checkpoint_read_schema.clone(), None)?; + + // Assert that the first batch returned is from reading checkpoint file 1 + let (first_batch, is_log_batch) = iter.next().unwrap()?; + assert!(!is_log_batch); + assert_batch_matches(first_batch, add_batch_simple(v2_checkpoint_read_schema)); + assert!(iter.next().is_none()); + + Ok(()) +} + +#[test] +fn test_create_checkpoint_stream_reads_json_checkpoint_batch_without_sidecars() -> DeltaResult<()> { + let (store, log_root) = new_in_memory_store(); + let engine = DefaultEngine::new( + store.clone(), + Path::from("/"), + Arc::new(TokioBackgroundExecutor::new()), + ); + + write_json_to_store( + &store, + vec![Action::Add(Add { + path: "fake_path_1".into(), + data_change: true, + ..Default::default() + })], + "00000000000000000001.checkpoint.json", + )?; + + let checkpoint_one_file = log_root + .join("00000000000000000001.checkpoint.json")? + .to_string(); + + let v2_checkpoint_read_schema = get_log_schema().project(&[ADD_NAME, SIDECAR_NAME])?; + + let log_segment = LogSegment::try_new( + vec![], + vec![create_log_path(&checkpoint_one_file)], + log_root, + None, + )?; + let mut iter = + log_segment.create_checkpoint_stream(&engine, v2_checkpoint_read_schema, None)?; + + // Assert that the first batch returned is from reading checkpoint file 1 + let (first_batch, is_log_batch) = iter.next().unwrap()?; + assert!(!is_log_batch); + let mut visitor = AddVisitor::default(); + visitor.visit_rows_of(&*first_batch)?; + assert!(visitor.adds.len() == 1); + assert!(visitor.adds[0].path == "fake_path_1"); + + assert!(iter.next().is_none()); + + Ok(()) +} + +// Tests the end-to-end process of creating a checkpoint stream. +// Verifies that: +// - The checkpoint file is read and produces batches containing references to sidecar files. +// - As sidecar references are present, the corresponding sidecar files are processed correctly. +// - Batches from both the checkpoint file and sidecar files are returned. +// - Each returned batch is correctly flagged with is_log_batch set to false +#[test] +fn test_create_checkpoint_stream_reads_checkpoint_file_and_returns_sidecar_batches( +) -> DeltaResult<()> { + let (store, log_root) = new_in_memory_store(); + let engine = DefaultEngine::new( + store.clone(), + Path::from("/"), + Arc::new(TokioBackgroundExecutor::new()), + ); + + add_checkpoint_to_store( + &store, + sidecar_batch_with_given_paths( + vec!["sidecarfile1.parquet", "sidecarfile2.parquet"], + get_log_schema().clone(), + ), + "00000000000000000001.checkpoint.parquet", + )?; + + add_sidecar_to_store( + &store, + add_batch_simple(get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?), + "sidecarfile1.parquet", + )?; + add_sidecar_to_store( + &store, + add_batch_with_remove(get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?), + "sidecarfile2.parquet", + )?; + + let checkpoint_file_path = log_root + .join("00000000000000000001.checkpoint.parquet")? + .to_string(); + + let v2_checkpoint_read_schema = get_log_schema().project(&[ADD_NAME, SIDECAR_NAME])?; + + let log_segment = LogSegment::try_new( + vec![], + vec![create_log_path(&checkpoint_file_path)], + log_root, + None, + )?; + let mut iter = + log_segment.create_checkpoint_stream(&engine, v2_checkpoint_read_schema.clone(), None)?; + + // Assert that the first batch returned is from reading checkpoint file 1 + let (first_batch, is_log_batch) = iter.next().unwrap()?; + assert!(!is_log_batch); + assert_batch_matches( + first_batch, + sidecar_batch_with_given_paths( + vec!["sidecarfile1.parquet", "sidecarfile2.parquet"], + get_log_schema().project(&[ADD_NAME, SIDECAR_NAME])?, + ), + ); + // Assert that the second batch returned is from reading sidecarfile1 + let (second_batch, is_log_batch) = iter.next().unwrap()?; + assert!(!is_log_batch); + assert_batch_matches( + second_batch, + add_batch_simple(v2_checkpoint_read_schema.clone()), + ); + + // Assert that the second batch returned is from reading sidecarfile2 + let (third_batch, is_log_batch) = iter.next().unwrap()?; + assert!(!is_log_batch); + assert_batch_matches( + third_batch, + add_batch_with_remove(v2_checkpoint_read_schema), + ); + + assert!(iter.next().is_none()); + + Ok(()) +} diff --git a/kernel/src/scan/log_replay.rs b/kernel/src/scan/log_replay.rs index 177996a800..cebce5b6c4 100644 --- a/kernel/src/scan/log_replay.rs +++ b/kernel/src/scan/log_replay.rs @@ -323,6 +323,7 @@ pub(crate) fn scan_action_iter( mod tests { use std::{collections::HashMap, sync::Arc}; + use crate::actions::get_log_schema; use crate::expressions::{column_name, Scalar}; use crate::scan::state::{DvInfo, Stats}; use crate::scan::test_utils::{ @@ -364,7 +365,7 @@ mod tests { #[test] fn test_scan_action_iter() { run_with_validate_callback( - vec![add_batch_simple()], + vec![add_batch_simple(get_log_schema().clone())], None, // not testing schema None, // not testing transform &[true, false], @@ -376,7 +377,7 @@ mod tests { #[test] fn test_scan_action_iter_with_remove() { run_with_validate_callback( - vec![add_batch_with_remove()], + vec![add_batch_with_remove(get_log_schema().clone())], None, // not testing schema None, // not testing transform &[false, false, true, false], @@ -387,7 +388,7 @@ mod tests { #[test] fn test_no_transforms() { - let batch = vec![add_batch_simple()]; + let batch = vec![add_batch_simple(get_log_schema().clone())]; let logical_schema = Arc::new(crate::schema::StructType::new(vec![])); let iter = scan_action_iter( &SyncEngine::new(), diff --git a/kernel/src/scan/mod.rs b/kernel/src/scan/mod.rs index 0672345eb5..13a15ffb7e 100644 --- a/kernel/src/scan/mod.rs +++ b/kernel/src/scan/mod.rs @@ -11,7 +11,7 @@ use url::Url; use crate::actions::deletion_vector::{ deletion_treemap_to_bools, split_vector, DeletionVectorDescriptor, }; -use crate::actions::{get_log_add_schema, get_log_schema, ADD_NAME, REMOVE_NAME}; +use crate::actions::{get_log_schema, ADD_NAME, REMOVE_NAME, SIDECAR_NAME}; use crate::expressions::{ColumnName, Expression, ExpressionRef, ExpressionTransform, Scalar}; use crate::predicates::{DefaultPredicateEvaluator, EmptyColumnResolver}; use crate::scan::state::{DvInfo, Stats}; @@ -428,7 +428,7 @@ impl Scan { engine: &dyn Engine, ) -> DeltaResult, bool)>> + Send> { let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; - let checkpoint_read_schema = get_log_add_schema().clone(); + let checkpoint_read_schema = get_log_schema().project(&[ADD_NAME, SIDECAR_NAME])?; // NOTE: We don't pass any meta-predicate because we expect no meaningful row group skipping // when ~every checkpoint file will contain the adds and removes we are looking for. @@ -663,10 +663,10 @@ pub fn selection_vector( // some utils that are used in file_stream.rs and state.rs tests #[cfg(test)] pub(crate) mod test_utils { - use std::sync::Arc; - use crate::arrow::array::{RecordBatch, StringArray}; use crate::arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; + use itertools::Itertools; + use std::sync::Arc; use crate::{ actions::get_log_schema, @@ -690,23 +690,54 @@ pub(crate) mod test_utils { Box::new(ArrowEngineData::new(batch)) } - // simple add - pub(crate) fn add_batch_simple() -> Box { + // Generates a batch of sidecar actions with the given paths. + // The schema is provided as null columns affect equality checks. + pub(crate) fn sidecar_batch_with_given_paths( + paths: Vec<&str>, + output_schema: SchemaRef, + ) -> Box { + let handler = SyncJsonHandler {}; + + let mut json_strings: Vec = paths + .iter() + .map(|path| { + format!( + r#"{{"sidecar":{{"path":"{path}","sizeInBytes":9268,"modificationTime":1714496113961,"tags":{{"tag_foo":"tag_bar"}}}}}}"# + ) + }) + .collect(); + json_strings.push(r#"{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true","delta.columnMapping.mode":"none"},"createdTime":1677811175819}}"#.to_string()); + + let json_strings_array: StringArray = + json_strings.iter().map(|s| s.as_str()).collect_vec().into(); + + let parsed = handler + .parse_json( + string_array_to_engine_data(json_strings_array), + output_schema, + ) + .unwrap(); + + ArrowEngineData::try_from_engine_data(parsed).unwrap() + } + + // Generates a batch with an add action. + // The schema is provided as null columns affect equality checks. + pub(crate) fn add_batch_simple(output_schema: SchemaRef) -> Box { let handler = SyncJsonHandler {}; let json_strings: StringArray = vec![ r#"{"add":{"path":"part-00000-fae5310a-a37d-4e51-827b-c3d5516560ca-c000.snappy.parquet","partitionValues": {"date": "2017-12-10"},"size":635,"modificationTime":1677811178336,"dataChange":true,"stats":"{\"numRecords\":10,\"minValues\":{\"value\":0},\"maxValues\":{\"value\":9},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1677811178336000","MIN_INSERTION_TIME":"1677811178336000","MAX_INSERTION_TIME":"1677811178336000","OPTIMIZE_TARGET_SIZE":"268435456"},"deletionVector":{"storageType":"u","pathOrInlineDv":"vBn[lx{q8@P<9BNH/isA","offset":1,"sizeInBytes":36,"cardinality":2}}}"#, r#"{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true","delta.columnMapping.mode":"none"},"createdTime":1677811175819}}"#, ] .into(); - let output_schema = get_log_schema().clone(); let parsed = handler .parse_json(string_array_to_engine_data(json_strings), output_schema) .unwrap(); ArrowEngineData::try_from_engine_data(parsed).unwrap() } - // add batch with a removed file - pub(crate) fn add_batch_with_remove() -> Box { + // An add batch with a removed file parsed with the schema provided + pub(crate) fn add_batch_with_remove(output_schema: SchemaRef) -> Box { let handler = SyncJsonHandler {}; let json_strings: StringArray = vec![ r#"{"remove":{"path":"part-00000-fae5310a-a37d-4e51-827b-c3d5516560ca-c001.snappy.parquet","deletionTimestamp":1677811194426,"dataChange":true,"extendedFileMetadata":true,"partitionValues":{},"size":635,"tags":{"INSERTION_TIME":"1677811178336000","MIN_INSERTION_TIME":"1677811178336000","MAX_INSERTION_TIME":"1677811178336000","OPTIMIZE_TARGET_SIZE":"268435456"}}}"#, @@ -715,7 +746,6 @@ pub(crate) mod test_utils { r#"{"metaData":{"id":"testId","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"configuration":{"delta.enableDeletionVectors":"true","delta.columnMapping.mode":"none"},"createdTime":1677811175819}}"#, ] .into(); - let output_schema = get_log_schema().clone(); let parsed = handler .parse_json(string_array_to_engine_data(json_strings), output_schema) .unwrap(); diff --git a/kernel/src/scan/state.rs b/kernel/src/scan/state.rs index 85eb6e4a79..0dfecc4eee 100644 --- a/kernel/src/scan/state.rs +++ b/kernel/src/scan/state.rs @@ -243,6 +243,7 @@ impl RowVisitor for ScanFileVisitor<'_, T> { mod tests { use std::collections::HashMap; + use crate::actions::get_log_schema; use crate::scan::test_utils::{add_batch_simple, run_with_validate_callback}; use crate::ExpressionRef; @@ -282,7 +283,7 @@ mod tests { fn test_simple_visit_scan_data() { let context = TestContext { id: 2 }; run_with_validate_callback( - vec![add_batch_simple()], + vec![add_batch_simple(get_log_schema().clone())], None, // not testing schema None, // not testing transform &[true, false], diff --git a/kernel/src/schema/mod.rs b/kernel/src/schema/mod.rs index 6086a70313..11e81817cc 100644 --- a/kernel/src/schema/mod.rs +++ b/kernel/src/schema/mod.rs @@ -286,6 +286,11 @@ impl StructType { self.fields.values() } + // Checks if the `StructType` contains a field with the specified name. + pub(crate) fn contains(&self, name: impl AsRef) -> bool { + self.fields.contains_key(name.as_ref()) + } + /// Extracts the name and type of all leaf columns, in schema order. Caller should pass Some /// `own_name` if this schema is embedded in a larger struct (e.g. `add.*`) and None if the /// schema is a top-level result (e.g. `*`). diff --git a/kernel/src/utils.rs b/kernel/src/utils.rs index 8f4fcf8189..fd2db25013 100644 --- a/kernel/src/utils.rs +++ b/kernel/src/utils.rs @@ -13,6 +13,7 @@ pub(crate) use require; #[cfg(test)] pub(crate) mod test_utils { + use crate::arrow::array::RecordBatch; use itertools::Itertools; use object_store::local::LocalFileSystem; use object_store::ObjectStore; @@ -21,7 +22,11 @@ pub(crate) mod test_utils { use tempfile::TempDir; use test_utils::delta_path_for_version; - use crate::actions::{Add, Cdc, CommitInfo, Metadata, Protocol, Remove}; + use crate::{ + actions::{Add, Cdc, CommitInfo, Metadata, Protocol, Remove}, + engine::arrow_data::ArrowEngineData, + EngineData, + }; #[derive(Serialize)] pub(crate) enum Action { @@ -73,9 +78,23 @@ pub(crate) mod test_utils { .await .expect("put log file in store"); } + /// Get the path to the root of the table. pub(crate) fn table_root(&self) -> &Path { self.dir.path() } } + + /// Try to convert an `EngineData` into a `RecordBatch`. Panics if not using `ArrowEngineData` from + /// the default module + fn into_record_batch(engine_data: Box) -> RecordBatch { + ArrowEngineData::try_from_engine_data(engine_data) + .unwrap() + .into() + } + + /// Checks that two `EngineData` objects are equal by converting them to `RecordBatch` and comparing + pub(crate) fn assert_batch_matches(actual: Box, expected: Box) { + assert_eq!(into_record_batch(actual), into_record_batch(expected)); + } } From 1b7fb11dcbe8d53877fae1d9902ca2021e658b03 Mon Sep 17 00:00:00 2001 From: Sebastian Tia <75666019+sebastiantia@users.noreply.github.com> Date: Wed, 5 Mar 2025 16:11:23 -0800 Subject: [PATCH 24/38] feat: support the `v2Checkpoint` reader/writer feature (#685) ## What changes are proposed in this pull request? ### Summary This PR introduces foundational changes required for V2 checkpoint read support. The high-level changes required for v2 checkpoint support are: Item 1. Allow log segments to be built with V2 checkpoint files Item 2. Allow log segment replay functionality to retrieve actions from sidecar files if need be. This PR specifically adds support for Item 1. This PR enables support for the `v2Checkpoints` reader/writer table feature for delta kernel rust by 1. Allowing snapshots to now leverage UUID-named checkpoints as part of their log segment. 2. Adding the `v2Checkpoints` feature to the list of supported reader features. - This PR is stacked on Item 2 [here](https://github.com/delta-io/delta-kernel-rs/pull/679). Golden table tests are included in this PR. - More integration tests will be introduced in a follow-up PR tracked here: https://github.com/delta-io/delta-kernel-rs/issues/671 - This PR stacks changes on top of https://github.com/delta-io/delta-kernel-rs/pull/679. For the correct file diff view, [please only review these commits](https://github.com/delta-io/delta-kernel-rs/pull/685/files/501c675736dd102a691bc2132c6e81579cf4a1a6..3dcd0859be048dc05f3e98223d0950e460633b60) resolves https://github.com/delta-io/delta-kernel-rs/issues/688 ### Changes We already have the capability to recognize UUID-named checkpoint files with the variant `LogPathFileType::UuidCheckpoint(uuid)`. This PR does the folllowing: - Adds `LogPathFileType::UuidCheckpoint(_)` to the list of valid checkpoint file types that are collected during log listing - This addition allows V2 checkpoints to be included in log segments. - Adds `ReaderFeatures::V2Checkpoint` to the list of supported reader features - This addition allows protocol & metadata validation to pass for tables with the `v2Checkpoints` reader feature - Adds the `UnsupportedFeature` reader/writer feature for testing purposes. ## How was this change tested? Test coverage for the changes required to support building log segments with V2 checkpoints: - `test_uuid_checkpoint_patterns` (already exists, small update) - Verifies the behavior of parsing log file paths that follow the UUID-naming scheme - `test_v2_checkpoint_supported` - Tests the `ensure_read_supported()` func appropriately validates protocol with `ReaderFeatures::V2Checkpoint` - `build_snapshot_with_uuid_checkpoint_json` - `build_snapshot_with_uuid_checkpoint_parquet` (already exists) - `build_snapshot_with_correct_last_uuid_checkpoint` Golden table tests: - `v2-checkpoint-json` - `v2-checkpoint-parquet` Potential todos: - is it worth introducing a preference for V2 checkpoints vs V1 checkpoints if both are present in the log for a version - what about a preference for checkpoints referenced by _last_checkpoint? --- kernel/src/actions/mod.rs | 8 ++-- kernel/src/log_segment/tests.rs | 76 +++++++++++++++++++++++++++++-- kernel/src/path.rs | 21 +++------ kernel/src/table_configuration.rs | 10 +--- kernel/src/table_features/mod.rs | 2 +- kernel/tests/golden_tables.rs | 5 +- 6 files changed, 88 insertions(+), 34 deletions(-) diff --git a/kernel/src/actions/mod.rs b/kernel/src/actions/mod.rs index 105571289f..ccc96f1eb2 100644 --- a/kernel/src/actions/mod.rs +++ b/kernel/src/actions/mod.rs @@ -811,7 +811,7 @@ mod tests { } #[test] - fn test_v2_checkpoint_unsupported() { + fn test_v2_checkpoint_supported() { let protocol = Protocol::try_new( 3, 7, @@ -819,7 +819,7 @@ mod tests { Some([ReaderFeatures::V2Checkpoint]), ) .unwrap(); - assert!(protocol.ensure_read_supported().is_err()); + assert!(protocol.ensure_read_supported().is_ok()); let protocol = Protocol::try_new( 4, @@ -849,7 +849,7 @@ mod tests { Some(&empty_features), ) .unwrap(); - assert!(protocol.ensure_read_supported().is_err()); + assert!(protocol.ensure_read_supported().is_ok()); let protocol = Protocol::try_new( 3, @@ -867,7 +867,7 @@ mod tests { Some([WriterFeatures::V2Checkpoint]), ) .unwrap(); - assert!(protocol.ensure_read_supported().is_err()); + assert!(protocol.ensure_read_supported().is_ok()); let protocol = Protocol { min_reader_version: 1, diff --git a/kernel/src/log_segment/tests.rs b/kernel/src/log_segment/tests.rs index 158015ef7b..4fdbd9feee 100644 --- a/kernel/src/log_segment/tests.rs +++ b/kernel/src/log_segment/tests.rs @@ -210,7 +210,7 @@ fn create_log_path(path: &str) -> ParsedLogPath { } #[test] -fn build_snapshot_with_unsupported_uuid_checkpoint() { +fn build_snapshot_with_uuid_checkpoint_parquet() { let (client, log_root) = build_log_with_paths_and_checkpoint( &[ delta_path_for_version(0, "json"), @@ -225,18 +225,88 @@ fn build_snapshot_with_unsupported_uuid_checkpoint() { ], None, ); + let log_segment = LogSegment::for_snapshot(client.as_ref(), log_root, None, None).unwrap(); let commit_files = log_segment.ascending_commit_files; let checkpoint_parts = log_segment.checkpoint_parts; assert_eq!(checkpoint_parts.len(), 1); - assert_eq!(checkpoint_parts[0].version, 3); + assert_eq!(checkpoint_parts[0].version, 5); let versions = commit_files.into_iter().map(|x| x.version).collect_vec(); - let expected_versions = vec![4, 5, 6, 7]; + let expected_versions = vec![6, 7]; + assert_eq!(versions, expected_versions); +} + +#[test] +fn build_snapshot_with_uuid_checkpoint_json() { + let (client, log_root) = build_log_with_paths_and_checkpoint( + &[ + delta_path_for_version(0, "json"), + delta_path_for_version(1, "checkpoint.parquet"), + delta_path_for_version(2, "json"), + delta_path_for_version(3, "checkpoint.parquet"), + delta_path_for_version(4, "json"), + delta_path_for_version(5, "json"), + delta_path_for_version(5, "checkpoint.3a0d65cd-4056-49b8-937b-95f9e3ee90e5.json"), + delta_path_for_version(6, "json"), + delta_path_for_version(7, "json"), + ], + None, + ); + + let log_segment = LogSegment::for_snapshot(client.as_ref(), log_root, None, None).unwrap(); + let commit_files = log_segment.ascending_commit_files; + let checkpoint_parts = log_segment.checkpoint_parts; + + assert_eq!(checkpoint_parts.len(), 1); + assert_eq!(checkpoint_parts[0].version, 5); + + let versions = commit_files.into_iter().map(|x| x.version).collect_vec(); + let expected_versions = vec![6, 7]; assert_eq!(versions, expected_versions); } +#[test] +fn build_snapshot_with_correct_last_uuid_checkpoint() { + let checkpoint_metadata = CheckpointMetadata { + version: 5, + size: 10, + parts: Some(1), + size_in_bytes: None, + num_of_add_files: None, + checkpoint_schema: None, + checksum: None, + }; + + let (client, log_root) = build_log_with_paths_and_checkpoint( + &[ + delta_path_for_version(0, "json"), + delta_path_for_version(1, "checkpoint.parquet"), + delta_path_for_version(1, "json"), + delta_path_for_version(2, "json"), + delta_path_for_version(3, "checkpoint.parquet"), + delta_path_for_version(3, "json"), + delta_path_for_version(4, "json"), + delta_path_for_version(5, "checkpoint.3a0d65cd-4056-49b8-937b-95f9e3ee90e5.parquet"), + delta_path_for_version(5, "json"), + delta_path_for_version(6, "json"), + delta_path_for_version(7, "json"), + ], + Some(&checkpoint_metadata), + ); + + let log_segment = + LogSegment::for_snapshot(client.as_ref(), log_root, checkpoint_metadata, None).unwrap(); + let commit_files = log_segment.ascending_commit_files; + let checkpoint_parts = log_segment.checkpoint_parts; + + assert_eq!(checkpoint_parts.len(), 1); + assert_eq!(commit_files.len(), 2); + assert_eq!(checkpoint_parts[0].version, 5); + assert_eq!(commit_files[0].version, 6); + assert_eq!(commit_files[1].version, 7); +} #[test] fn build_snapshot_with_multiple_incomplete_multipart_checkpoints() { let (client, log_root) = build_log_with_paths_and_checkpoint( diff --git a/kernel/src/path.rs b/kernel/src/path.rs index 23e7819de5..df372f08ec 100644 --- a/kernel/src/path.rs +++ b/kernel/src/path.rs @@ -163,10 +163,11 @@ impl ParsedLogPath { #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] #[cfg_attr(not(feature = "developer-visibility"), visibility::make(pub(crate)))] fn is_checkpoint(&self) -> bool { - // TODO: Include UuidCheckpoint once we actually support v2 checkpoints matches!( self.file_type, - LogPathFileType::SinglePartCheckpoint | LogPathFileType::MultiPartCheckpoint { .. } + LogPathFileType::SinglePartCheckpoint + | LogPathFileType::MultiPartCheckpoint { .. } + | LogPathFileType::UuidCheckpoint(_) ) } @@ -174,11 +175,7 @@ impl ParsedLogPath { #[cfg_attr(not(feature = "developer-visibility"), visibility::make(pub(crate)))] #[allow(dead_code)] // currently only used in tests, which don't "count" fn is_unknown(&self) -> bool { - // TODO: Stop treating UuidCheckpoint as unknown once we support v2 checkpoints - matches!( - self.file_type, - LogPathFileType::Unknown | LogPathFileType::UuidCheckpoint(_) - ) + matches!(self.file_type, LogPathFileType::Unknown) } } @@ -357,10 +354,7 @@ mod tests { LogPathFileType::UuidCheckpoint(ref u) if u == "3a0d65cd-4056-49b8-937b-95f9e3ee90e5", )); assert!(!log_path.is_commit()); - - // TODO: Support v2 checkpoints! Until then we can't treat these as checkpoint files. - assert!(!log_path.is_checkpoint()); - assert!(log_path.is_unknown()); + assert!(log_path.is_checkpoint()); let log_path = table_log_dir .join("00000000000000000002.checkpoint.3a0d65cd-4056-49b8-937b-95f9e3ee90e5.json") @@ -377,10 +371,7 @@ mod tests { LogPathFileType::UuidCheckpoint(ref u) if u == "3a0d65cd-4056-49b8-937b-95f9e3ee90e5", )); assert!(!log_path.is_commit()); - - // TODO: Support v2 checkpoints! Until then we can't treat these as checkpoint files. - assert!(!log_path.is_checkpoint()); - assert!(log_path.is_unknown()); + assert!(log_path.is_checkpoint()); let log_path = table_log_dir .join("00000000000000000002.checkpoint.3a0d65cd-4056-49b8-937b-95f9e3ee90e5.foo") diff --git a/kernel/src/table_configuration.rs b/kernel/src/table_configuration.rs index 565546d52c..e43dd7e726 100644 --- a/kernel/src/table_configuration.rs +++ b/kernel/src/table_configuration.rs @@ -259,16 +259,10 @@ mod test { schema_string: r#"{"type":"struct","fields":[{"name":"value","type":"integer","nullable":true,"metadata":{}}]}"#.to_string(), ..Default::default() }; - let protocol = Protocol::try_new( - 3, - 7, - Some([ReaderFeatures::V2Checkpoint]), - Some([WriterFeatures::V2Checkpoint]), - ) - .unwrap(); + let protocol = Protocol::try_new(3, 7, Some(["unknown"]), Some(["unknown"])).unwrap(); let table_root = Url::try_from("file:///").unwrap(); TableConfiguration::try_new(metadata, protocol, table_root, 0) - .expect_err("V2 checkpoint is not supported in kernel"); + .expect_err("Unknown feature is not supported in kernel"); } #[test] fn dv_not_supported() { diff --git a/kernel/src/table_features/mod.rs b/kernel/src/table_features/mod.rs index ee27fc17ed..eaeaff3138 100644 --- a/kernel/src/table_features/mod.rs +++ b/kernel/src/table_features/mod.rs @@ -123,7 +123,6 @@ impl From for String { } } -// we support everything except V2 checkpoints pub(crate) static SUPPORTED_READER_FEATURES: LazyLock> = LazyLock::new(|| { HashSet::from([ @@ -133,6 +132,7 @@ pub(crate) static SUPPORTED_READER_FEATURES: LazyLock> = ReaderFeatures::TypeWidening, ReaderFeatures::TypeWideningPreview, ReaderFeatures::VacuumProtocolCheck, + ReaderFeatures::V2Checkpoint, ]) }); diff --git a/kernel/tests/golden_tables.rs b/kernel/tests/golden_tables.rs index 2b1bc1a71a..0210b4467c 100644 --- a/kernel/tests/golden_tables.rs +++ b/kernel/tests/golden_tables.rs @@ -408,9 +408,8 @@ golden_test!("time-travel-schema-changes-b", latest_snapshot_test); golden_test!("time-travel-start", latest_snapshot_test); golden_test!("time-travel-start-start20", latest_snapshot_test); golden_test!("time-travel-start-start20-start40", latest_snapshot_test); - -skip_test!("v2-checkpoint-json": "v2 checkpoint not supported"); -skip_test!("v2-checkpoint-parquet": "v2 checkpoint not supported"); +golden_test!("v2-checkpoint-json", latest_snapshot_test); +golden_test!("v2-checkpoint-parquet", latest_snapshot_test); // BUG: // - AddFile: 'file:/some/unqualified/absolute/path' From 9daa09febe1444ecac31d127d37492728f984bc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Rzepi=C5=84ski?= Date: Fri, 7 Mar 2025 23:41:06 +0100 Subject: [PATCH 25/38] Update HDFS dependencies (#689) Updates HDFS dependencies to newest versions according to [compatibility matrix](https://github.com/datafusion-contrib/hdfs-native-object-store?tab=readme-ov-file#compatibility). ## How was this change tested? I expect current CI pipeline to cover this since there is a [HDFS integration test](https://github.com/delta-io/delta-kernel-rs/commit/1f57962207ae6256e7189a9ec5aa13be33d734f9). Also, I have run tests successfully (apart from code coverage due to missing CI secret) on [my fork](https://github.com/rzepinskip/delta-kernel-rs/commit/d87922de6556fc77ea584dbc9b11ad64ba243278). --------- Co-authored-by: Nick Lanham --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5ecf9b465c..6158709b9b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,6 @@ version = "0.8.0" [workspace.dependencies] object_store = { version = ">=0.11, <0.12" } -hdfs-native-object-store = "0.12.0" -hdfs-native = "0.10.0" +hdfs-native-object-store = "0.13.0" +hdfs-native = "0.11.0" walkdir = "2.5.0" From 1c2fae7afa9844756ad946b7eb62c603b8ca9e09 Mon Sep 17 00:00:00 2001 From: OussamaSaoudi <45303303+OussamaSaoudi@users.noreply.github.com> Date: Tue, 11 Mar 2025 15:24:10 -0400 Subject: [PATCH 26/38] feat: Add check for whether appendOnly table feature is supported or enabled (#664) ## What changes are proposed in this pull request? This PR adds two functions to TableConfiguration: 1) check whether appendOnly table feature is supported 2) check whether appendOnly table feature is enabled It also enabled writes on tables with `AppendOnly` writer feature. ## How was this change tested? I check that write is supported on Protocol with `WriterFeatures::AppendOnly`. --------- Co-authored-by: Zach Schuermann --- kernel/src/actions/mod.rs | 13 +++++++------ kernel/src/table_configuration.rs | 17 +++++++++++++++++ kernel/src/table_features/mod.rs | 4 ++-- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/kernel/src/actions/mod.rs b/kernel/src/actions/mod.rs index ccc96f1eb2..9a216f088b 100644 --- a/kernel/src/actions/mod.rs +++ b/kernel/src/actions/mod.rs @@ -888,12 +888,13 @@ mod tests { #[test] fn test_ensure_write_supported() { - let protocol = Protocol { - min_reader_version: 3, - min_writer_version: 7, - reader_features: Some(vec![]), - writer_features: Some(vec![]), - }; + let protocol = Protocol::try_new( + 3, + 7, + Some::>(vec![]), + Some(vec![WriterFeatures::AppendOnly]), + ) + .unwrap(); assert!(protocol.ensure_write_supported().is_ok()); let protocol = Protocol::try_new( diff --git a/kernel/src/table_configuration.rs b/kernel/src/table_configuration.rs index e43dd7e726..dc065c8f8f 100644 --- a/kernel/src/table_configuration.rs +++ b/kernel/src/table_configuration.rs @@ -192,6 +192,23 @@ impl TableConfiguration { .enable_deletion_vectors .unwrap_or(false) } + + /// Returns `true` if the table supports the appendOnly table feature. To support this feature: + /// - The table must have a writer version between 2 and 7 (inclusive) + /// - If the table is on writer version 7, it must have the [`WriterFeatures::AppendOnly`] + /// writer feature. + pub(crate) fn is_append_only_supported(&self) -> bool { + let protocol = &self.protocol; + match protocol.min_writer_version() { + 7 if protocol.has_writer_feature(&WriterFeatures::AppendOnly) => true, + version => (2..=6).contains(&version), + } + } + + #[allow(unused)] + pub(crate) fn is_append_only_enabled(&self) -> bool { + self.is_append_only_supported() && self.table_properties.append_only.unwrap_or(false) + } } #[cfg(test)] diff --git a/kernel/src/table_features/mod.rs b/kernel/src/table_features/mod.rs index eaeaff3138..5074807c92 100644 --- a/kernel/src/table_features/mod.rs +++ b/kernel/src/table_features/mod.rs @@ -136,9 +136,9 @@ pub(crate) static SUPPORTED_READER_FEATURES: LazyLock> = ]) }); -// write support wip: no table features are supported yet +// currently the only writer feature supported is `AppendOnly` pub(crate) static SUPPORTED_WRITER_FEATURES: LazyLock> = - LazyLock::new(|| HashSet::from([])); + LazyLock::new(|| HashSet::from([WriterFeatures::AppendOnly])); #[cfg(test)] mod tests { From cb674474d25183a6fb8377d73b6bb316ebd66a95 Mon Sep 17 00:00:00 2001 From: Sebastian Tia <75666019+sebastiantia@users.noreply.github.com> Date: Tue, 11 Mar 2025 20:26:17 -0700 Subject: [PATCH 27/38] tests: add V2 checkpoint read support integration tests (#690) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What changes are proposed in this pull request? This PR is part of building support for reading V2 checkpoints. https://github.com/delta-io/delta-kernel-rs/issues/498 This PR ports over existing delta‑spark tests and the tables they create. This test coverage is necessary to ensure that V2 checkpoint files - whether written in JSON or Parquet, with or without sidecars - are read correctly and reliably. This PR stacks changes on top of https://github.com/delta-io/delta-kernel-rs/pull/685 resolves https://github.com/delta-io/delta-kernel-rs/issues/671 # How are these tests generated? The test cases are derived from `delta-spark`'s [CheckpointSuite](https://github.com/delta-io/delta/blob/1a0c9a8f4232d4603ba95823543f1be8a96c1447/spark/src/test/scala/org/apache/spark/sql/delta/CheckpointsSuite.scala#L48) which creates known valid tables, reads them, and asserts correctness. The process for adapting these tests is as follows: 1. I modified specific test cases in of interest in `delta-spark` to persist their generated tables. 2. These tables were then compressed into `.tar.zst` archives and copied over to delta-kernel-rs. 3. Each test in this PR loads a stored table, scans it, and asserts that the returned table state matches the expected state - ( derived from the corresponding table insertions in `delta-spark`.) e.g in delta-spark test . ``` // Append operations and assertions on checkpoint versions spark.range(1).repartition(1).write.format("delta").mode("append").save(path) assert(getV2CheckpointProvider(deltaLog).version == 1) assert(getV2CheckpointProvider(deltaLog).sidecarFileStatuses.size == 1) assert(getNumFilesInSidecarDirectory() == 1) spark.range(30).repartition(9).write.format("delta").mode("append").save(path) assert(getV2CheckpointProvider(deltaLog).version == 2) assert(getNumFilesInSidecarDirectory() == 3) spark.range(100).repartition(9).write.format("delta").mode("append").save(path) assert(getV2CheckpointProvider(deltaLog).version == 3) assert(getNumFilesInSidecarDirectory() == 5) spark.range(100).repartition(11).write.format("delta").mode("append").save(path) assert(getV2CheckpointProvider(deltaLog).version == 4) assert(getNumFilesInSidecarDirectory() == 9) } ``` Translates to an expected table state in the kernel: ``` let mut expected = [ header, vec!["| 0 |".to_string(); 3], generate_rows(30), generate_rows(100), generate_rows(100), generate_rows(1000), vec!["+-----+".to_string()], ] ``` ## How was this change tested? Tables from test-cases of interest in delta-spark's [`CheckpointSuite`](https://github.com/delta-io/delta/blob/1a0c9a8f4232d4603ba95823543f1be8a96c1447/spark/src/test/scala/org/apache/spark/sql/delta/CheckpointsSuite.scala#L48) have been compressed into `.tar.zst` archives. They are read by the kernel and the resulting tables are asserted for correctness. - `v2_checkpoints_json_with_sidecars` - `v2_checkpoints_parquet_with_sidecars` - `v2_checkpoints_json_without_sidecars` - `v2_checkpoints_parquet_without_sidecars` - `v2_classic_checkpoint_json` - `v2_classic_checkpoint_parquet` - `v2_checkpoints_parquet_with_last_checkpoint` - `v2_checkpoints_json_with_last_checkpoint` --- ...ckpoints-json-with-last-checkpoint.tar.zst | Bin 0 -> 4711 bytes .../v2-checkpoints-json-with-sidecars.tar.zst | Bin 0 -> 45873 bytes ...-checkpoints-json-without-sidecars.tar.zst | Bin 0 -> 2825 bytes ...oints-parquet-with-last-checkpoint.tar.zst | Bin 0 -> 7868 bytes ...-checkpoints-parquet-with-sidecars.tar.zst | Bin 0 -> 53226 bytes ...eckpoints-parquet-without-sidecars.tar.zst | Bin 0 -> 7141 bytes .../data/v2-classic-checkpoint-json.tar.zst | Bin 0 -> 8448 bytes .../v2-classic-checkpoint-parquet.tar.zst | Bin 0 -> 8474 bytes kernel/tests/v2_checkpoints.rs | 224 ++++++++++++++++++ 9 files changed, 224 insertions(+) create mode 100644 kernel/tests/data/v2-checkpoints-json-with-last-checkpoint.tar.zst create mode 100644 kernel/tests/data/v2-checkpoints-json-with-sidecars.tar.zst create mode 100644 kernel/tests/data/v2-checkpoints-json-without-sidecars.tar.zst create mode 100644 kernel/tests/data/v2-checkpoints-parquet-with-last-checkpoint.tar.zst create mode 100644 kernel/tests/data/v2-checkpoints-parquet-with-sidecars.tar.zst create mode 100644 kernel/tests/data/v2-checkpoints-parquet-without-sidecars.tar.zst create mode 100644 kernel/tests/data/v2-classic-checkpoint-json.tar.zst create mode 100644 kernel/tests/data/v2-classic-checkpoint-parquet.tar.zst create mode 100644 kernel/tests/v2_checkpoints.rs diff --git a/kernel/tests/data/v2-checkpoints-json-with-last-checkpoint.tar.zst b/kernel/tests/data/v2-checkpoints-json-with-last-checkpoint.tar.zst new file mode 100644 index 0000000000000000000000000000000000000000..dbb8aa627dfa626ef5c3df40cc9b4fc4178e5ce4 GIT binary patch literal 4711 zcmV-t5}55MwJ-euSoM+s%D}WKQBax{JyRW*$evY{9frL?H_|ejws0E(@akB^CuHcI&Z%opijXMWdE#*d2Na*c1jp|<3`U^8TMA$T zVFIQCIZWA&Z4(H4W_Ma$ zJ5QBUFs2HnSE<%YwNlOOtgfBc>7aD(w5R={6b^#&`Wjq=ZGK@6&|z_9n)EhJw%v@ zsEM8^ilSs^k!5+t-xlTdweYA`^DP|q-c!)0s_T5oK8d0x=)dU#dp}EXR7n}wMOjz9 zsDfhey@T=VYoO6O`)^I>v+GPJhJPFsiXAP-&MJeDWjDvpDurA&$j9tggMvbnmC=6o z-h(iaD?!N?FB-U#J5&B?>)TJ{zP0ePwVDrUZmLZ*Ip-=l=NNi^eu%`7bCjI(m~)oN zId_@Ga>DXi!b&{^Id?f`zp&W0fhpk^@*vmfLdwzWk+d3-&8;jI*EUD`8!Df8ZR0NLU)o6iPlGlCT1vrd<)rH?5f0}_diR;)cYBSaheVOa6mYbB>VW9by4D#$TJzI&0mOe?T-F>l$g@HD2S_*Z9X{v6xWnvg#VUJ%RC9?BlT*jK^X(4&y@Y z`o=!|!{Kf67kHNC3rxjdg0HT2kaUqhCHsuS zxVrKqw-1K{;&9yh_4P#U%0GU@woBCZ*J_@BMf?ML0^{(1ZL$9hy!=Ds^_`$yY5XIb zykGUdr(J3M2IH)oy0QD#T6c|8BA;CuXx)~7;@Qg+eU0RcL`4|?wDqTT*Vw1)Y*k$v z|4!G*TNkzjk>RR|%#mDuOaoOvm!~Fzg)0hhksjHyAfE67g?@ zarm<<1FA}7wuEMD`S)_UUN8>-_^3#mh0%B+yC7PVlOioEAr>ZOMnpjboa=p8!i7;-jI zSp$d~HFRCn@BtMjHB4yanUj+qIj3w?SV4<7^r%Cfb97D?4NzU0xo;l^5Prr+)2$WvQ)G*}+A9UA)?%)u~6BWmi z6oCpBmkVZWGSC5r5KONR*o_N4q>#Z1I0CSFvBU;jVDVsrCMF0v!blb{ij)*!C3pf0 zBsGYb1}tcGFI>sR!Nd$QQc%GK*#(I8BL@i~Hl*3fQjCAjCTYsPS)yYpx=yqdMY4xw z0-v_VG*%~P(l-qg7>a$3g`#3MVr9}q#}XCEj>O0_xy_8_X^!G~lAToomHm5}FzaV5 z>-qNqJ!w}@b5;HUdARlOPyAv2iA+SAttERGKho;+S(%UdcPmM;ljCWkeH2YHNNJvD zP>@NUK~NMu$IdF@pe~Ienmn+8kO&?iFh&z2N{-^$$#8`m24rjy?J2kjF#$}(q9{WrfkHPBeroV?~YvI_( zuu$uVjQH5A{Cklj*}I9m!8rVFtZ!nD=BTQk-)3fJa5#RBX8)%ulCzTpPnr1wU9MRP zj-qS^qP_PM?Jf@EYLeC{<{$rAg2aajkq80+2#|AL0dWK9{X#!(S6Il$t^cg+%5W@e zivVs{AGfZNpQ8_jf4t^tAIw#sqlIQ+r&_4JP1xCo213K$x&SFTOF2tGrtGa-U2>9g zl2}{z)~zgfEb&-kF|oI9Vd0U)BZ;-c-nz8|&JxZN%Y?mks|rpMPSTGv_SP*8ZkBGA zeoV2q*5PuK?5#N(F}c=|&Ety6sf5^Db1q<_fQdrI!rq!^fsq0urIoq%*7#v3?`-n2$>Vv! z(kilrWW9R9;tkQlPTpFyw6YN0!;QT)m$H*L4~m0U1v~bURv5(r7hO(PS`lmqPF7mk zhwfoS7O0b_+tO09ksN&}Csp+)B&rOfqpH!XZc8h7E@+*!w+4d6bF!09o>Z$_+NT~i zYIj;y{b{F0uTm`_Zm3o&)c$JwW22w*bAzKlkxTn8%!}9Q)G!fh*Z0S;jZJZk6uCVy zYxA?O_E3`)xji5DG8v<WI9wSdXidWXRtNQwsh9>^-Z05pvolZw|2K1yYy8=-+IXEQxqJnJ`@=bW#*heC7tfH4zDwF&MPHZXDIeHl-KFh=)AS6`jgk0 z8Rdqm(>Y5yNj#Q#B;hRKB;73CB<)z*K}AOv9aFN{Ov#bO6oX2ra-*Jd$|y4;BO*zX zqyzE_7zDydjN?QHr5zFg;W!Y4K@36|4l>3NLI@#*5JCt+gowxp$&8R~0e}Yoq2ohs zx3_1g5MrL?<0&{;0zQld)D<7AGs!SjAwoJ(vYM&L6LU%WsR|jgOiyZrabL?Dyon;= zK+ApbQ(g(007b2;0-@OzcqLq=5D*3B7gR3i6{S5qN;n%>C9&f#yXx6)4T0p{~pD>3-2`#&tuuVv=Qj<)>0^dD;VL2gmAcmRv zoO?1cCn7%4kX5XS!Gh%6{5W3inUJ9pQ<(1SF)qQo156`szO;6&+ZQb+xnI%wo|>0d z)yzx#;Q;e8p~}|A79SAkm$9lzX-L@DuusElLBVrnm9*!z6f8KGRzsu5D1ie1I@U8I z%?cpUSFtx+L@R*;|2j5i{z$z5^8w>p88Bx+=)v$hk&ev!VnrpE?ZwUiDqt%m?s#%d z){abOe;oGEu=!oS7xKbBOE(SU6IUjn7itmW4>Eu_uSMuJ4M6MHHukpgCGVv0o%b~f zCZ(41j*9nMu)sg&eV%-$-Sd)E~6r0Oa$YIryXshA9mYH&xaa5}E!lH}Qia|cAOBhTRfG(8aAc8*%Ap+C5 z`G0*Cy3;f@;pTQ{-1R>dNMW#@4E<)tldfz?{b(PxJtPYA@pz*l;!ZY=45yo@c{(=22{k406w@C^nEPi!B=f5VTN zYC>S(!k?g%+vNl{vBQ+L6BpKLVG;v_nd{PQD(a1N;}JBhnaGS;h0J%$h8%KcsG#v3 zwt1%F`5d@^fF_nlWbF|&JA*;vp6TU84Et%oh)wUlYZ-khJc?TT;Tc0QPEaj2fpzlM zSkDO(r7nBfGzqos*U?kGH zsrVet<^guA1LTka60qB7*54N4Qt;DHL0WrDcrM9O*nm^w0nXsaiVA@2SHg~}Dy$;l zW1bB7>`;S-XzX!wtei;E86j0A9_{}>H$&~J{F4PXfd09+k_G-D!10sgD@2~Il*)3vUM zq?|Z@LP0oE$}T75U8f^Tyqr-U4vf6H9MGX-e#H@$V^xMEceE$6ZNx3QOf!=U^SFTk zg1lTd^y~xC5`m)wIvzH!$cFLc0GbOxQ2v86=4%HOa07s?fcppe0|=v;5CddDTS_7W zIGv0ED%WEJe(-<+pBdm{R6rq#!}J)?pUF6&84ZBS0WB~Z@P0nt3jkzZu7?36!+?cA z)q+G-MP2yd#+ru)!1MvaE`WrxFoQAHngPk-55P7N;JE?7QyP94JwW_0W@nig1)w_? zxa`5EYWvd+PHli0@#}HZuw*kMiX#KDz!UjgQ59lCKbbOTD+ldUvwErtsWO37;?GR( zw!-CSPrLoGS2D3J5wH#0Ydwq_ifAX+vqf)!_G7USk}}T>`EGcyd~S)Y0SEGmmZb4# zw>zYR@hg#SUqLMpq5vTxrqBW&kp)8y7Kj3e06KIFZ&^Qe-U1TnOI?vxZu!TNimcr_ zoajTSxD>$DzWwuV3g1`l7TaHZ-jCYO0&9aN$hKSG)gMbVfA=aQq8Td;PQu!856!P6QpWjfztIQ~-|Y>mYe#(f|-ta83JepYdvOIA+f^)!&s@s0f;7a z!hohc+dTpM`X2pIr{3mfKu056j~UjJ4DjLi z0hHtutz0izkBYynS*dmmO~hU+0LjKUhXG+co>2qF0T}lkH9+uiSSU0lF}s&(w$OEH_}RX2uLGH z`TBp)r}OR1Tr+deT=zYPt)BM|Fp%f~{Mc>8wUJ3RN_0F;M+5uaX>_AZ-?B-%6cr$J zdg`=Zj&c`&$2ee5^&Gx4Rl7_rWqKKm5gzaq%KeSo&;sSWm^hNYOx?yGEi^JV=kfKbt%Q&~o5w}YY z&2CC=v)E8pgK3HgvX>iW6n%!y^3@3jcBrY%mwrUE(3ow4GfAVR!3kGPFPQ z&t?41%G28CRX9#w{+V2HnhEHxvE^NIWoKJ#(@A4O&lV z>_(|`z4)A3L!a?vKAe4d#D`psHIuaxHkgcS_2F?Yvbb0@S90w{`bFN>Zbtl>?3ZoD zQG}SfNA$F{h@hfl9joCNR(yn_qhAd@kzJ|ii;dc>tgKFh5Ln=as2;7fqkUPV?|3 zqMGS@ZHVOJ@s8a}GA^JIUjIHgr|D#p&946wYDhyI{&ZtZ;WMe>V>|6M_R$2MZ*FLo zaYE){X+4)*zBx&ikE?^?lF}X`{IHtEyWAgDi@Is<9k%GCN28hwX^?spj6<3&dTLkl zWFqfS9hVXsG|g^ulxDnYkT9n=nHu zc=BQ^QAw2rdCqfU98-eF37X4zYM)*ZFfB^l^eMct!qQl74yvh*f9>aj480Lf(fz#E zQ;{E7KO*%qKSr?dE{SyI?WHz8lRsl->lqDZ?rG*{nmYOnXLuIF=#K56nxJSDxr|~6 z7006=op>&*V6Sw-E8{2o_6j0vbIlVTUS6ka|t1h8e$q z7XE28`e|o4++q4wP~!4sLm!bH&I1~Z+xu2M^;d%JWD#QK*Yi@ghYPJlfI>eXMRtg`RRf{FgcPL;|`~#+JZ*V!hV)(*}m2|aFoaiFnAI`XnZX& z^v7_hY@YjXxndDgv9~vdcpOB%5IqVlRQ6oth`Q-&zR-SH{mr&8#yeEwa#?HrfbVMK z+TBzt+(c@TczWO^A-he+^oydSMAV-1dcXCkRHMB4wn?C||5n3=MtC<1&LfIt&JEmJ zklUk<&-w!=cdJo1Ydz&U_x5|b^^O;L8nqt-<0-8?9W@@|Ib};ZQs>F(?$?tJ8CSQ{ z3gv}fHdg%DX`P6f%V#Vu)Qvy>75XTOQLaaPKV#ut|9j0vy0QS-II#*Pb73;@u|>tQU686W>H$R`!Y>MHd zx;{eOmiIDl9x+7TB>Y&E9+$IjmuXFJbX!Ygo*<%@xAVOL?@FA1j|iCQ>O5e+6j^0X zb;MWK9m{74g&Fq&ow+^Xr|PNo&XUS!EZ#=)H)lk}M1(&? z`NjIhrkKc@Jo85}U9frNavU)|-{|HD!${=d6LGQ3vd@d`haHdcS~h8(zKauXPib$> zk;Su6m%Z9;Tg!XWV@f$M%dLZCWqwpi?9GdJxJ+@lO!Lf7+5g;VH#dw7N?sUQ&4%KN zrQq)}DoyatOKtz{C@!U(7WhKRTt@t|LGCsg-+~A!Jo1J+>eNbnJ5o-iblFTks$*73 zYT-?wN``~p!k5^H2NbfO$mH;o$sVw4p&;scPkNU^3_!oNwN(LK#gB_ET?&;S;>{?$ zk6_RIZ1Pv(tO$}^j2u0cT$s88Z-HK4nRM~v-`*~?Nw&^I<;ocjZtXKdx7D`SUb6?n z?F&6+zb}Z{j3XJIXbgNHe@?VVsG?R&QcV7v`#Hy+lJn6%$Cz#<( zG(|I1DX<@7lwav=Bt<1$>YciiKV4x^hA8PqZyNi-(#MWPF`S6)2|Xn=hBhZ0T-_Y<}3OO@62DHaiTz=$W7T zX#ArZ>RNVc<_ zqwRxdi7!-Wc80S=N`6>Z51u|hi|=Jku*qB`Z*D0XM|^st=XUu*^L-2h;o`u4P>Xk1 z6gJ04`ORJ`eIH9LeJ)4Bj5s~jquvJmvw+Ca+u!Qf)MGa(RGbMn&nlATi{?^ZFBRSs zs$2`YUNxrMpv>m5NRn$IreY>$AzR(LJ-Mo@hhyCZ-Pq8RRrRnUM69l#y0sayh#Yaa3|a0g9x~1Tc%|9H z0r?b6`*Ia?T3y6>DNp^SjdV(=bwY{n_TiKJG=Xbjmmfz>a$g_6CBVzpyaTsvlY+$(z4Jyj{avgdgL0 zyTT4L$^Fk+S96&X$-R$o|J6INIH>c5C-~_nY~tlAHvZhDr@blfM*RhU*U8*2$X}=D zU(~jWb)~lz7$2-!xz-EIC<}b%3$G1-d29UTzSQBG(){L!i;r?7qYOWf-Se+S5>+P2 za#4Ki4`?1F_RBGU`m{tMlCjvm*tUOY97-KVm+ zfoR^ke|RQosb$0#MlS!VAk64UHH12;=2?To0eix^_t<7PXI?#NU}z$>$`8a*>%=lw zi-D~GvHaDiIN1*@%G5d5SdZg&+1zQ{Xr4$8Ey1jS!)G$UOz+ZgP)sHwFS$oSgOFSp zD+f=a$3TZNf{c7EQfpFm;e+d4o7&w1)la)=+pM1%X)g0Gc~!HS128Smy@P)q8AcGNK@5fX$q<>>_o~wrSc_IUsaf< zIg!*m?*K{RYh%ZYdgtL(!(S(aQ8Ia2_AK}xN=7E+K86XyX??o!9OTGFWrZEd)IaC- z`V4>nx|d}%SvfnDEU*!I@|$0eSBJM%vrSXd5l<&&O}3ueMXj-@HY3n-rejV?>Xpd- ztoC=wqnDh=LE(hjW^_!E-U|z3C9FFI7w%D4!fod})0+w{j~Zz1@QZnD!ZG-4{9I0< zqzX;Ck%Mg9w<&6OS61il<#Tp|d!?TDVl$u&qGA}T1W|@6=l01-uYKhaj01h#F6sR= zl@5pw8S-(kKj%8JU8)`~f1-PUHl@gc8*vR`QQ3qTGKHPU7&BE+uQb=U^~PxJtnkZs ztVg{qOn>KY=NhMY91O8tAE&Z6j58Am7B1sHs&}7iEodco{b9~EoTVGt z-cB9ZlWd#1$XHD0K!FVB4tc$x8X_>1^}tH1RKCMjaiy=-f$c*34~#%9&&9#rR-m|g zS*mn4rdIW@i(h-4!7$y*do2Q%05%MTZYG%BSK=8;XcaEKqrC2+{^T7s;O4;_uKi8v zFoHinJlH0r9#%FnOR%3I`pPMfw9iS(I6QDiv17V!U0qaC!;fA3IgOT*X z%(z`Biau-Tqnib%0sZ3fxmWls6-4?=YCp}<oO5%b-p2~-#e zh{LvkgXOVZC}WKkydd}c-F;A#ge^nF|>OfkR)7O8o5bO!^R zxRROXUui;KlXTQ~YBg2*3V4jQIs+C;$@)N~{ig%KXtpH-Z8L?jIh_N$jrhQDFG}!3 z2Q2d|ej^>w7?-+(V{lPu1JjwEH-{vQn|WajzCdnQ3G|24s4_5tX)U4!KNsNB_71?4 z5W=^B@-VT19}!A}9=VmkoBBvQ4TxX(arb2csEcAJ0*c!Yp8+s!FED8dP(4t|gVgOT zHJHu;Dw>Ub_-NA~Dsw<|xMX@YB%tp#ZE%6wHj2*Zd;1kN^-TTb1HOj213FvPKj`|P zs08pcl9z70(13)y9Dp9YvibV+3Z(9&?KH$;=;W)=+Lds`t9lGdvnq>lF(8d}1vVY| z)=7@(R;j=gw_)Q)Se}hU$AdT-MH|)7@Za8`Isa-7676^b>&yDhK;mOC2Yi?SUef9T z=R-*k!A0gQt@Ra}^UW&Xi&i9=>wKgEc^dx?YDwP46^A01m=(ObE_P~7~?YT9~%n3t$* z`9`8~t?guJ5w#g79Y8OXjaa<;X$H`yF|J;2*uj2Ok5>SIH=8)x+~;@##J&4G2a;7e zfijywYBdf%*oSDSf#4X0n`HvQr!*GEcwIQFKGUR~y6N8zElr*v{{((~y35vap$HT4 zisO5VqbW1|6AM7NY5VhRkQlJ7M&;s^XICFGn7_99FUV3dT@qMf#3Y~OC&NtdI1WnQAg=eTFNxhd8COncnx=VVc$NG+t9sj*E*mXx#^&$y7Hn0O` zHlGYfp;hBv10gk!5AZ9z0yT(F|2L8OS0JrP}x|5<OW(N!xdAW8W&2BPNq3F0L|N2Z2^yHCDXJz2bHAiu$Hb)*DErcI; zg-1=fqq1u0i%w4?GH@4}{}KRLmK^{gw6T$?ziiN+(%+*++ac7;Q$oao=^x5|K|T&UY??)j$H6m8lQ_ClvX1`yGc`>FkLGajpBKGth&($x11x zB15qA5QY2S8`{<*FuQFIBoPMcT%S<^jzLcsK<=ex4lu&^^6e|;Xks5EKP9HF{d{}n z>ocUU+lJ6j`K&x@Y*s(+&Yv0nt)c@EW`Doev)Y{s3j<58-_$gS5Z%>ZVWIAB(CxAz24%U>^YPXh)tzdIS71Z-Gtg&w&m4<`B-6-ua|<5|p}T05HY) z*6A1$Iy#j9g#$zEHe#!;2a_q?;ykx6dgra%kAvQudiJ?{l2M%qrhHlrxKSm&oFg3Y zo=pucA1M!Euq1CpJPpum%S9v77^_U6$lyNEtyS=yI}k?%6jow`M)u`SAj_IpH>pl? z|5IAv43;GDA`oDuGfw_Uul3;gr*Azq4fM>lY?Q|RSO;RhDz8I9Pl+b1{(%NFSI<_e@p=#C)`^VyE;2GF4gsqgTC zx#J(=v;z4!+4k+Nv^KC5B@qkC1{EL^Xyn^$bp<9@L}}a-l8As`5R?NZ@~1YSgn51V zyZhOyjy=I2!uYU3p0m4T&kWnuq9SRXKT+e^HU%7XK<|guOl;8Le;yV3H*@5|UdX+| zU<=myODo#wU};yB$o|WShggT@fY2Xtq;jTrSz7|K{9^SH>NAP8YopI!&fO^A7SW{l zkXuR{LP0>@NAwBE)qOzi4BHNA(L+biK)j}HletuJz(6T5sc{8+1$AdGviu@uxwr ztCIbKa%o}vPo@v1)23B?Y5@8Ul{c>H568QlSwN{mg>5~Rqc_+CpSG0V-*jSVVs{p) z?*JxG$vVKRuX(uSw;UN}#-xC9n>z2f{>tMN}xr(jf&O@pitx1IW zDcQN!44OOW(LsA!-@#U;qmG+83%@eoo5SA_K*Fo}8Gxs#PjzdK4e^oFy$ZBswuaWu zzh|k9az#7IxsxEV*I`v~?A_)=Lx$RG9F}}cUz9k?q_YD?`1&(Y##8uISXp>f$UO0j z4<9Sz2`ZwAl_cI4j!Ny`u0rv$)qP1MdbrcB`WQLodSoCBRA8k5VomvvpQEgl=l}lQ z?Wnqw`i=vU6rKC)whwR%czW(wdTEgsC%m5uMOgL9dE+Z!2e)&!w6q9oXkASTIO2?_ z31;FEa62UB3qvKK8c-KXQ<%@|=Hz5Kq(VZc&JdSN>f*YP_%HF+pIOzj-)82&czK8& zUeEDQq}am@a`==aQy5z@jFI;i(0e5LT)ty&B#r3zy*ZnDkDBzXUZ=mawa@y5->5nN z;`d&`PkTN}CakiE>%lxQ8s~^W72_+j;I%;bD(V_xl3(ztfIyjhlC@|d_*5IPgJqV zKU1w9iyb&l(imC6bDvEZU^?^g{$s_jnXMv%2MPd4IF@0ur-pI%v3{uGco;p> zas7jcmrIm^x>IeziS5Ig^ZW3YyA#_}8zDhw;k(lFe!PgNUU{VtQG&Mbr`aQ4j$?IX z=Vs5-hrAVmbN3_RE;OcAx1AL*K2@80+^N0-@0HJL+!|_jXZuUnHlr7%qr09-whdEP zk7l~EK$9FsaFYaol_Ynya(53? zI{Xk#(+Sxo`kc2nqE-LCUdXom)w1uhiuqBh)P7k&q1~msql)Wn;8tCVyyZLG$i&6X z-G-M*u6Nu=wdJoLtx;28)y+uwpJ}19Wff^Zs^5YM_<5Y=Cf&XqxWL@75sb({q_UX%_mBBWnK*lH}y#8U~g%X}VkXoH8V7E)jcDc;m7P zKRu--Ez7E3DbBSYa~g0o60=o^aLRveJBRPq}cAH zdGE6sqNT+aIldlTwUi8%@O+28lMYDQjhxk+oB3*!SleRzyH@E;Lv3G$uJK5uq)~k# zD=`}Bql0;%!u6}SapAMP-EGU+{V$J;@{y)5Py38Ye?R58b6@{Rr@yEv!`|#}V;}t? zavHB<%{7Kz-y=2J;0vbeQpY6I7> z;!e3%z^GBcsLgfD+u8%$#f6rwqHG&I|I(h+vI$#H+a+`Qi*G+>R(z(44z`B}&~X`0 zs5Qo{Mr{+k=Id9SP3|3S-V*iSc;xb`|29eHB6nx?z#Z z0$as8X2#r!4a+Bt}yy8L$4|O zqgyltMgxXV+ani+YKzQ`B>np|4RdSq6~2)r=5Pip$^}JSg{a(H=y;?WRjDK3gfH9) zz1HrbNRKYY+tmatOty^qF}<$cZZDLOm3=zG0Y#%3j5Ig^aO$trx#MXM8^l{;=*KFZ z4eyG7-I#tkS=J3m2$;{84raf9u87a>n&0{-W$@-DFP8O=K6`j9l4y*`pJPoHPh7EJ7Zd}d&~4alhRwP-d_HunK|E4+xONQ zuCy`SDLUFw`T})Pi*$cQ`+X^8>0@*9!zGQg@^5p+SuOM1710_Sv;hN8wH$Nn`e>Ea zC@hd+(wqTb-<1_?s`8N2&W60c!s9u?5p*lv73--iHNu!r^uC{<4}m}S(8+0jJter} z+f1x-BerwvK78Riwy9CNL~nCT{i#Sg7haGLA4E_Qzc`AN^6B$>oU9Z@iT(QoM{0as zZ$fSh{i06cf!Di~3uW#N{;2BA$`*AgyT_M?h3S(bjNcyd99t<`8M8C7v?~HOA%ZLh z9DIz@cF~KX9`8xA#1(p|9Uom}Qv@CZ{n@YIV@Z+X>s=fC+_2VU9C-ge* z5er)-IbEZcsI``*Q<}z&mrqNbdwAB@J2cp6lKj`BslCwVTE8!ib1Z8M<{XL%aqgkm zSzN1V&k%j{dF$G^eC}2A<>u+{#2SdjNAfE`MjjO5b0l#5*WrvcNp((e8Wnv3n-X#b zX43%yQwbEz{6kBt`)2O1!&kKXM|EYquX;PgNB2~Bri>q}CyKB=)6M8nvK)WUcq@$) z$D>WPFD*b~w)S1u*;3L+mNJ(mPf)u^OM4L6hK_U=>X z3qvKz@GS@m9p(T&%&CyDx;0$47cX(@(W3wPjWI9hnRz}crAL5bQ@XXEkqZcs_=g8> zgtVe?0TwBF%h;gsjJ!Sk2%Eyjg)K>M|F&PZZ#*w-^iEuPb{1z21ujMe^0_WnaZ{xX z?mFR-+2ogdVtZE1;r!C{zSS@0jT-gxM%Fdje#RutercJFNony@{`D$H;)klc*%>#6G?UPV+QSEPCd>NSpQqok?PZa(znt#kvFhWF{TEV@Fgd7NY&+^HQ zg)E{QjYQCsg<4=yh9dvbP85(22o{RI(}dXHkh{oj;agDDvVYB9`05n=$q7H(kEH=! zuJ%bI?m?_5h|{xImKaU@5Sf-o|(KWKqj?RDy&{sQ=4i!XpDyBas? z_)Vv0utLeeWFq@PA24D37-n()W(2NiPK=uJ5B^dNAh4C_hi4%E>QAMl>-s{*I#}_7 zmVZ2^N#KVnaHKh70)GhrleP`mZp3=qXoLk{Vkf9>%D?RgR+2&}(Zsxj1F3UB)ez(` z2)2xwJ-gZ;wjt7t*S6E_H{I;V0^dt_Tlz5~--schy;lf^^_OIRNjOELowl&CJHqU7 zh#kcLaZSxrOxy<;N+VYG>3cNvbnq6$4)Q;UDS>HXjpI~ZFutAx9>Vfd0)S4Ap#J|v zk3&zPdwxfEkWOAY#FQvzT>{B0{1yd1Plt)^s&PPA_|Rl$ z3=n14O#;nX#OhSa>IPm73KA8j(J84!DyQHDW2agu?KS`nJ`bV6`ZvBng85;arUnGa z^-HCOJBXY}IBQ#FhZ5(ml!C1AQ*^MVSPNjpqa$Obb)<$17YX|txS8Am3Y0>29*n|| z)8I`h(TzZlAvG#*7=A2DpB)d=EC4mdgun&IQygFz{z3~Yd^K$uQ)dT)4n4;;QBq?Kx0kZDl|U z4r?XDo?8aq2MU&K=(HhjXt!Ma4_YYP=dpGs<1=3TCvDob$3KX$FihZ}c9 zIl{7gtt8FtV7JdA99yBgJobr1DFh_r_l}??Z=6{$27C;y`1uEt9WFEmR01M*505Xz zEYyBHgc}qpqQ5!=K5sx>lAsk*amcfw>^T@-dg9E!JbP!uYK{74UDW9;#>;RB$pT7JwHg$`Z*ov5qa>vQmR zLP8#p$oUY^%EEXZjpw#5SjbCEYFtnE<~lY5K&tNu)cr<+`-M>Hg*8PL%@$Vv z`5++rwFVOI?Q0AXY2h`bvA_)4m()H6d%Ff|-j_Bvf0cH)K7 z^`?*I=WQLwBBY+wW&sGtAbP-R?wSFiMdjOL98B8*$mWT;7s)33NCGTJZ0cY^^i0S! zBgaVGM$%iIz(Utb-3w{-(6>Mhdb6R{Uw3wM}QJa&rb+H~=A5w9Z{H7u|T53s_IO06XW< zx;OvkiUNlhdI08Z;2Bf`?DUJC(mG1Xe2COkr%{=4hc-J{$5JI&Th@E)TUO+7z_;2bLuCrttBXbSuzZOi6%c*|^Eq!e8L>W~ zLF+c7-tUC|M1%e-hQ{yB!$yuAfjN;7#;MfHrqv&g-`g|A7e0=Q4gxf-PB3d{7@H3P zHyFO>4LUgi@O3d@T7@U(sdt-`H2suGxV3kiAD3p}&y z#qo3%0{Nc7C=SPrfLQGB0}SxnliO6H>^-5s_(?W6?Am{6OwI`Ebnc^ltM8uj-sa_W z*25^Yz7NB#t(0&?P{v$fzYKXT7@WR<`P&3ZY2ib`f2Hfo;%2oW50 zR8s@7nZGLs+S{jyeM%&L^%91@s{lwU+CHwBqDz8WHZ{%Wi#ehNCI} z7jR4&J|GnQfPQ- z9TCa|=D(%~DY_BIBoqjCYyvs79mH5+0Bv9oofw)&5XWJUD1>_E{(8-Bx_pgu+a^-3 zt@Z1aQdAne7PC4iy2vW4Xiq+Y{z3U&Blq>zIJ7{}$~X>M45@)MK=9=k+18zYx0P8> zM;JXy>3Z3Fmq2T3&^Hl(w5;uw^+wHH_iWnv;%Ms&ys*63aK=U}phoPM zw2BSkM-&?wA(iFvU`XW^~~K=Rudfsb-AXP*}z)hd1~(4c=$OfTjW z`QNI@r?a)P)j`#*!LsK=F^_n{ngmX*E6#V_@v9j0W@O1|O{upJtWoV2{-5}+gFC}) zI^Ul~^^11L({z8&F||DvsrA!(WqrC~j~uZ{WSYD&bU2??Sr#?^x|8np-R8SqWs8a6 z!e#N)w6$Q>;*IQZ#W#jJoo#dkj9a=Qn+hkT?#fSel%oR^{=C;zzIK?{OGb!tLbE`F z!!-QeQPy=$;q}6kV{AvHt8Uvwl%wv_fzXV0@|S@b2>0;PY1G$$qVT-nH{CCH5`qz5 zO)HC=U8WAwlH;guHS4{@nR+nB-G~y}5a}KonK&Y;Z4zGj0g-)oR&Bbt{p>x@4|z}M z9@n56P(ngN4Jb0FXZ)Ui{I$OP3%=d!gJm|XmxtBX5yYZdA$LwByfqT7!G9y>=VCBCH-t zEFCMNoYn615kkbB$U5+0F9NGi_9Zn%_lfH<7MEzqcAs#}CRLxzBZ4SqIY;XpQsQtD zJ9^C?(TNQw;6M(#k+QONzjpOVmjB3yztZrk?V$|I9(nq2^FPnE+H#9Z3P>BISs6+UA!rk?kKux+A-|B@uHL&luxLhJMzigUx>O-?tx`d<=Qjl6xs6 zcD(%Bd>0$xwM^^udvAQdZJ30OybPa4+bb5#+dRnN5fKqFH4v)$T2sVbE&1_Ie{6~?L@KQ=7v9Qa0U;=yi3n&5(7GDO-1EI5&-F@D`S` zdl?$LD%%A#RzPDfMV%jK^vX@emRJ;=DzybVV#Ld&UK znkfmr_T~hF&ISdA@J8dQHQsRll2AL&#mV7U3rQN{2{%O$zhyJ{;|JQ}+iJVf=S!yuL- z_W)Ff%6BhsU(EdDZxgTq-8@GmRvW7yE-X_Y zzMMXM<8oKA9dRl+(_+Hi_V&V~(f$@vYUDj$ zG^sRkz92ExXbWuHZ+N|;yHzOU9m6cN>@|kL|D?<`&3*Isrli4U7`mig zTROK_BdGEAYumIGl6SQJZ{6TqYEna=-jk2rFDd#+XoIPg>lVP4=RK9mj702r*8#y@7~-LtGrgb zFl;{@G_CNRdp94ys>>j4m*rvD%X0v?$X{lh0d&Cpg8 z#jPlf&(3M2^UQVb_4RU?dc2dh$gMZ2qnF&8F#-|pgfZbF3W)kX6k~K?AHgVtjr&xX zU6JqUG&b~2a|v3%cB^>L1*$O`bfA&vBsIEZ?yPs*BS$bad=5>Kv6n2z3FRr8(vxR> zNSa3aDVRkGC!YHMnFtm|M4@j=AqhD~rjNzl9*My3{YOP031QOG2>j;=ENNUsD2p2# z;wKGc;;ju#^C?k^yJa14Fu^Adi1qXAi-kaJ?O2B42%!*t;8Z4O4&<&Q0wG{j&(xD1 zu*-XnUb@031L~WD$EGC1sagC9qOg z1NLx~p+H?n4djhxNG;F>4YPsSPD6q*BWpB-2E}zm)Jw$S;RTJ*s@*`GNLiSl|04^a zP63uYnm}X$v!=#IT2y5_0eTdT0vKa@F?5>ukQ>)(WLSj_nvmb~L)|Hl%EaAYN7Q`8 zFb`eq+e2G7eL(4}EzYejBYLC^=I17;j?~#8V3YhcmL(y&#gQBH`~e%Co171Z87xi# zQZ$Ucz~nBKT>KPsPpA0VHkdj|s=SflELbJAFkm032DWy~|1j?js$VVCr8DAd#Ls5L zz)DX1&`edYvVlm?fPaFtzuB5TAY9e~qZhq~@bh8Ak3(C)DIuwBa_meZ%(Gr)>}>I> zIf#2vS|@tklVl)Q4U`y5e>g*RUJ8OL5M6V@l6^DKM2G{<%D6FzvJ*n<<+9aageo7x~l!=IeQ1<)ZOoW?s^_@Uc zuAkE!kPon!LL$wmHMw+!kohz~UOEVo`av5TrFHek_!I+=|!q&0SZ!=9f5{VQOUIS2-68!8h zw-3h|(r~cIJBfbq=0I%&1Hh5TB)Cp&bf5rCWHl%lOA5&Eun_{ODj@GfHq9fRGnT-2^(HoFtQ`lw^Axq^DyyzGgj`@{_eOwQ=JAqz4xL7js zYm>LYWBH#tz&wbfsOh^4z>)|p0paFjt;d+uqNkY@d(YMF#Ca1|x(c?i;Rf5C^#hZy zI!`H_c-thOt`ZWt!0uJKi8V>GCK)joQ_=qr;1Im|>jl!_&XcE1o?tW3p21LFWj3%) zr)Ey@r9&;O=ZX+9R~Zr{Tp7Q8!fgfaOA0gs$1}g5FBZWf!`D|}<84%M1u~o_y-P?) zO294T_I^~!Y|9nQ(Sy^!-XWkLPZN~_qq>ISEFrKs4n=)ntE<2XG<*~@Yne@8Ojfu5 zm~I@4DiRvYXJ8<-%1rDJ>oFEwm97;$e?_5`^RuLCoY{Y-!_)qXabvYa1EhImXITp= zr`-0^2SjI9AXPGqz+*h6h7s2%5Yh+#tlw1=0QmcKfqonYOfCekhb9E<;C_WdA3P4N zfdHQEdcgc%M7U2Yr?fb%>Z=oIvj(}g1lwouu7L1!5;m$tBI;6_D&W ztTS8y*q1)p~*?M*V>E*StQ@+U@8t$M|6-%)%HPonc8S zbG@NPKyPru97rplw2O!p(jyrdv3)gbUtskmaq??ey6rvGOXxpXoF|O~4j2TVwuOyH zLTo%|If1trSa562Tqk&=H34dok^_wCjG&nlaGgdJn+U&>^};?rS%L8UmJUQ9#Xf-* zH*>%NS3M_iCgVE?Fz*tg`$D#!SAiOQQrQtgz{3Qhs{e54jpOtHGRVLBJB8?Bz%tk& ze=GV2==ykg&qa9VFC5ZS&-uUgu@4s;fVnSg5x`cV^!kz8c4U8BbTWZjWYslVSu+ zLmlS;tD#2^v6$-#V^LK`hGsrW7^eGOZKN5LbW|-%=kb^1DJOKNJ>03s znIO9noOZ&*Dv_WV5h8+8klm<1kPm%0fXf_EdJ3G8AT6vbf%fWQVqCHsMM78>;VTp- z6!*LaLU$Le3)~T84a0A@aLGc0#GeB2EJm=ngGjMaH4Yy%PZ6RU*AGM^-N@kTY;@z! zWkPoR%!J{dFCKb4ySWc^^2IpQpg-8+ayB=i*seDwnp235JBM*8Eg{GgakQM!o4{_E zSbhC}uf>{wI2BmT(3%iU_k_;7)eyE?F9P+IkPqG$px!2MeYOC{b(;fkMrr^bg0Li_ zDuNvlJ#kO8%XVdODMEw1WHk?@+^7VKHZ?3ZVz17){lU^7*VZ7G(e}vl0-(dT1*9Uv zu{F8Fx#(hbSqU{6@o}v!)q`4_J^AvPuVF8A4q_cE3vzMY1h`Hi)Yw40D#D`bMx_QK zF*!w4hn*cUZ~)4MZW@Ec2yNri3bQBVG%i>ltx{TKM=3>aTW(=g?Bh+3AmM}Sb_;jy zG|O5vSOY`wSa1v8*>0t-pLE10LFR)Vamom?u zvJXaOJ=e5D5B|x9f6|Pi?N4=S_EwN4tZ5Vt4z}k$QGD_5u;$3F4h!GF03<(Yf-ii5 z_y-gXO@mq`gr^E9M3ONm_=K`M`jY&I&P*X5AZNyX=VJfSrPpI(yxD|9%=8kbNb=Zp z_zW8h6HMTfi~HijLNKjR|FdcY1t~OlfSgrPk(llD5fS2*M-j*4#5e>(xx@folJq?f zj){XtQ1&eri&+|Y8+aEM9##d!vC*C$0=|mcRpSufmtUE_32yd|*Go{PBMld0pKvJZ z+U|eN%T386Sn@g&`OguxZq@Txot{Ttia=EhYt#KF*vgMEsBe{E`47$TnRvi6$C$>Q zroMHt-lU}CMY9-rz%)%aA*pl;-%S)fF9S8nNV>2^DMZE;-VeI>`56}w6N2N&$ zVMIL1AA>4-J$ye8&CIRwJ!h1(fV+K!5@!ki_CdGc*_IIg7mhAm7SnyAnppBeTl>Yu zmv9}e;gD>))7FYDB43Ybh_Cv8&zCcpD!sWY1B=u7D*A6Lc;@P&j`4h21+++mUm4_G;oAueapud#-!YZ|Wh7Sj|j8bb7X= z%g3r+u=P><*w(eB)y%10%B0-))2F*eW^gqF-E#ysgP-;G0O7<#0R@|pe_rvK4r}MV z@>T3YtY0(LLQj=<^`uQCnoLUOOSUemteWF?bt(?S0bm)_3L%^}dd7)VGN8=I$A5uM zs#WciP1--5Nld>B1IrunwTX=b20pz%+<)Q0PK1}Ynf4;zdJ|w`Q03x-$Bf!9`v*Dr z31^5tWpRklC!!Z|jx&)i2>&Qln&b`DR@huCI=9DDfap8 z5%Pk@&(7~bVK1Q=9#RU@iJwLcr0MFw_n6PR!|AVOm&v#_N{p*bT3$8lz{>u4h{I@7 zwHo$7nu8vCx$GiPKBInY^o+gICt#_&*|$krR{w}`S<;D@d z=Vx_LYkDwErt{6@@2}UtFI~jmG`@BGb#lIV!|rwGu3Y<8+O)K>{^BsBd@OB0R^yl4 zv+Aak>1(}-OZ-}0GZjtE>x&X7&!5@}rBS_j-b{fnH(oq06MLwscF(MTx>rnf{ZjADqMsMXI*H8+fLe{^J!iQ5yEIj&W8>8Yg@_*utZ3WpZe+lsU{y9ar zCvATEKLCzEaldtvRbmqNZ%sM>o!sPW=f;~-ZKTQ9&EWP!|E1JMntY1MnGLk%Y&Rxe zJMX&N+wZP1`Dx>nnr+N^a1_VTMw72iagRyd&uxLAhV(xeaq=bi-x>FC=j{8e9pp32 z8&5IJ;S9qM#P<7ORmLQioM)0xF)5#XKta`*luzVSuD50zjsuwGzJD%?{oZeK`th_m z>@(kReLdQ)IA7>XYR~hnS}rXx$m>jVUZm z?u!CLX()hc`)!&%|GoPU&aYNI`@erE$Z6Ikbk-s)xseSo^l6+(7flW%Ti7D3vg;pi4sRoX>+ zBpH*l@ANB}*>IzLWhl8+MaJY72$sm0{GknZ`?*i^Hy=ednSvxmNq#1qeBHe_-Apu} z!+qMAlTR^e6KhDhkw&wD^2uIoE6rL{I(g*WA;Fp**0>SE6I*UDN+AmxO{{sN45K14 ziZ8YifKfqAgE<6fQ-B&=ieP|`s0Rcpx9idCSw)Md=NOzfdwgkl>BYC+yL;DmXDWspGNU92 zNYSw~OaUm)D9u@gKxx&sIz`R-+J`PsY%Q`V;SGb6iHy;8AVd;7>)Uy?AlNncdukr? z8QPAg7ipbc2+i#^?-@JL<{nmPO z|3Ka^4>S1v`O)qt4>z9vg5kWicTtPr?UZ(_b*^+@z4P7q(t9`Exr@Dr7K1*_l}{gP zJU`#>{P^nHe}!3V@1}I~+wJ#>V5sx1_D_#5pZRJ$E7dguS>7Ja{#GErITa)FSLGa5#9$Gd9_^2M-MSxKT?J5$R^T-I`RIcdgc1N-K@>5d@fV?K$`U|M&go>Xoc#1dtrbzAnSOj7C;!G{3n``~)Z-4*aM@2fR)qYxU z@!X&88Rm~S#u|n#rWRYg`~Obf78n(H)@&gTCC$(%`2oPjn4Tpf9UN6mEtvs?#HiDE zbPh6v(2U54h$MxE4x<-fAP@`$g8`wUJdeY$^#K#W!AApuP#_Qrgkc~UAPfTnK`KBmw^BWL?VE{2Xz^wKFxRljDi2*h3KLZ-= zcKIA2SS=gS{|2Dm0J&#?X*GcD@av}DwdVkQn;p>9G$309Kfp9Z`Usdw;kWAps4ATC z`7?a5Kx|WdkM(XvF4cfn;gtY1>axYP~ZTUrThVQO9AkA z32OZTP!h&j+Gp%rzc00>-)ghu55`^z8uYNeqJB0$1r2h4(LrYv<7E0?k-cCvIs=gR z3KcM%+#QGkHT41Nl12#}P_7Pef&m}f0iI(3*a6^Mb^m~$kOuJ$Xws+w#h(rsfn7on z9RPfsst4347kIj635vfNNiYww_>&OLWLUwWvslyvm;`citYFkPv4$tiu*40-nG1;0 zfH)@`>YXrvy#xS00LmSJ43-^mSk3_4y*S`LgG3$@;Qu}s+g*P^DM>`}4S2-)fU-pz z1Tav7-!cREQ+9xeWdnE(7*OYqz^^s-ExquxaX@SF6j*p34Ng9M)}{=`gN+Y9G(CC1 zifcB2;Lvo!0O%)x)#Lvk1Jr{==gRl#X~_W$9|Tw$?sk#$0nm7v)TwS806hb2G$0Z# zG7a4UCP4>?3P(Y@ZUzi+m+n8(C~zY>0LKi0z>kGxdIOkBz2m?ko!J3mpZqNVMa#{A zgnN13=84?K7+_(u0WGN;pcDfvPILrd4kE}=HXv?efJ*5DvR#7R5CY4R$`e}Q3x@%D z8SMe}#(zq_ZEUpIW(L%sN{Kx9Hi7-e!sE`3MDz_v6u0Dn;z}LBo~I(qAR7>IYzu^( zISfcx>723t{2)ydVhVgL0Enz)`}|@+4rm|HU!Sc8PVVoLS%6w8XcdnyX=gM(&CbQA zyBVt|cPe1O4_%Ey(*pnVIk6m!*9D|lH%F(NpR*pwOOFWvC!ZvaUAo^}8qBvW6WQ-Fa2&CudNFatg&NrZ1eJ~qjK(3?mBAX715GSdG1 z21JPiLP!TRz{@>E-+`sXVQ@fDJ*9VJrRotj2~y}T*4>CPk(pzlJP-KSApvXIfY3q| zs}Vr5X4{u#0H3Vc0X>vEzyZFZ45L5*$|Bbug4yta`*JcbQ#?UvG1DeI*eGH*yJ>_V z=Om+boydzuzlb=80w;YE5HqIsCTBsF)ALIM6ATGIvq!*>BZwbQ5JrnlHXF(!vW?f1 zP+Qb5-kFPQw?~^amiPuJcSjij4V6Lx1mXaj^bY_Tm3}K^#DA2h5gcKz0#;`v3$tz>@v}=akuiF3|w6fB_)}Nuc5X zghX{A*QzTC1C|`6^V$I17`0!SXh z?0^erg@ORUT4rGOfDx>0zyy7O5*#oCqWS81?gr3_+<8d0&nzxKmv@!HI+;s zV7d!1Qwx*wf^iGvpi}7q$`U)E-VA7TCIhws-Jk=|%qVC9xE7>f8<5y23jqthW0T<-y)=P$We>HBHChpVwgFP z$_6Ydc#g&bNFHU`0ncO_PzYrqK-D7*fKlxx?IjaovxXKfs`>W{lFte7AO;e|{Wd)! zp1(g}$wC1y8-V0?z>i&Z_y9yUJHUWufckm>VJabE0RbhDNdU!o$KEy@;BS#-6oA{W z3IpU0s;`Ax;F!` zwhi4SBQb;K9&Nh$cP#7+2&OPb0KfL{pTl85thM1Dfbu>-206eY1`yHmXaN)~P%j%G zmka?80_dc3fQuYH=ls`Nt9~z(&PjXXz_FW*UF_>1O?_{rLBGeefWf@+0uEUR9B{J% z#GwI5S98z;@Z0ZzF%kx-_AXcd2V^*PfrNr5n5S$UfVm4WQwx)(rJp!SgxQ`xfq?f8 zKI9z-DDK3&AW$~IlqoM3Kya$f4hU=95Cru7DxzVZ^I-tvTL3ZJ1Dk=7^b(-_fk2^; zHK6-J2b58&Os}b`LHD?_0b{i_fKeoi1yEUtb=d&$dK>dVz_A?|49GQCu57>nSaTG> z*st^&0N)xn@P}0a6CCCfpi+qywzN40K<%o~Hh|ylfRbtIEdcR;2T+m^0JYI02+T8l z1_S(>+W?i#1M-*{Q~;Z0n;noM-^K@=c$Q!Qzo`N)+z-HjutC{?j}|Zh*p{gPsw5~o zz+46l8?4{Js7vT^UT>( zHlT!V1_YkfPyi7a$jt^+J^#4t2CB{PfFK~3cWX%-aE@m*$Q;$?UdZ16$D;3Ua_k-z zmioQr(Paac>cX!8?xn5l06^|GF96uWWAnYRT@0AnEgK3zr~w{uY7Ox8^I>3)yB#3a zZYZyq3mUh#@ur*40|7Ij{EMX)p|b2$3qY6$zD%qf{KD!3njzdSYm^Nzwga$#+FAff z3o^3-$9@eF0zd*V1_N;W4p%mSt`__X;9f4X18n5=76y>iGDXU>KptRrYSs!xQBeVi7o;m2pbQ&edZ-EkjBX6rhn5XM+sl>;0JtDq z*#NSm0rTV=&8JyF4Vxd}`}_m)eKsKJYHxt)?fM5$?k8S0;KUiwGXOFO;KgOYel-VhF(+?~WUrz0da9?e=Y)#0#7p_)_L;xSVwYwTybC3SCpBy}}o%2dOWEGn}@PX{X15t6*+q>xV4lETzNNONe7 zl=xm^8K6_r_ex&rsicvYW}lceB16M{uP`GnPvY5ClBkO`E*XRIicRhr)Iu6e?5K&C z^Od9m(j8SDX|S-Xvnc82+)|^n4O#9>mC>DVo9DTcJ*FiNuC(y>WM}Elm0)c)L;KUJEjSM2K1Mz z!$ER^DO+j`GF2ZyiO$)#eR#XC~U$|{KyV(NlsNpJ!4u%9<_>Ib8 zbzzpbF6T15+1=gU?pT)DcE51nyY0PgVc6YWRsa+3;6Vsq4mo(>qJhd1G8`2Ed4v)T zZYPBBz?KE-4>i2p<-CIK!G2{ocHjQ)xP3RfTefZG-g$Rj_vP;^zpdN( z+u4@CEtlKQ-!8ZDe%%fCcfT)}cX#*u#IcDQZ#e&G#w-ut_Gf8}=b&Drh3oQ>s}bKW=X#_`*_F)P2a z%h~?Iup4i-$FduD*_B;b=C16=ub_7iJb2MT=e*r8cel&JTi%=Vw)3`SD`))bAVGhSTZhPCZ zu)O;l@0Yjn+sba|ax2Gj3-6YNTi#ijw?eVo3!#0q+4g1K zek{Ss#_`Emq+%h&;>g71UhHhqcx>|RR+fEDuhI3jO)d8ok4#*SO<-TnH_PA7#_@*R zzF}@;dYOtW+nHG3vF$}49J{WMvu*5SVGgs!WAVlA{oUJ=ue`(Km66oR1-i6A059oe?`364XKI7Sq`E58Viz^1BdIHx&vYV z?_%Tr4}U+N`#(g&gkt;p=69v{zZE2WG#r~ZXpntCH84E2={9m)^p7(c<}212xknKX9EE0nem6eE3Lb*K4}IMgXa3Vz+-<_Vm;Cf zRg8Z&oM%<$__dIU4=w(u5|rO8%VxshQ(;ohj4u@SpAC^;ru16B7W9m3F7>Y*&a2j1 zsFT)w6(Aa(DP&W2;driD4}s5RlwS*p62Rj>)4`A8hxFwe%P6uml_)MZA8GFYROWK{ zD`!fYD=7a9XLg&VCJO%Fn4x(5zY0+jy!}rF*uK_3oiuyV^1o2{zSc-RMd@$+Fwidq z@@@W&toXIo4+Fjb%YgE08^15Z(Z=%mS~HJLZ3Fp8^Q!s3l;D@mZ-eOQf6A=tN1EAx zB;eopaqxd5C=>*aZ7&1{+2#*{`+xYW5hMs}^PwzS_-I%*f8|0!UM}Qr3s{)&|2D?V z&~gsp(jrFn8KUYUYjQaMXE|jBFGr|7YN~5oV2n6pghNw+z!OnaA+@Kz%77m#Lnmqg z43d25pXeH}Khdl|(Ic^s=g%5qU(Sj8ylU&rJW;VftEPM|{zRep80ZiG0jlM5XbZnX zTlgK%iSGCw`t&*U_$LbFS(Jku`rDx}{)f-!b3We3HI!z?J`4Uvsk@A0Pt_x_5QE!k?ZDG_`|7o`!15}^_sD=h9TErtJ zCBtw8>LzNsaP@eSUBQlKa zR?sv^12k(oh;?OAHB!|})K`xf5+byHE>5&}qPUybviwyMKmtS z94-RsZ6}BnmzzqLnwrj^Uiw6_p;aq&ee(J9Lr)70mdxplDHD$qVy^0@?CXrB@k~w9 zNT)@*yDGL%%&3P-x)USf=0{S;KY)3J-UqOS&pDjb#(f(vxzfyBi>qOg%QFZtR{pIO+bJuOtwz81|P|VOKk1 z#DuJDYF0@O+1j)ShlQk(Czd(Vfglwst;BQ>@Nnq|J+6-z7$0K5@t!=LMPY7GQrr#; zscwpAdm`Q8fQZGmG-(DoU&(l>s;KOV8C4+?Pf4DV0P%uFN0=a@6-G}-m=d6@IaR07 zgxwtEf&^h@`Kb5r5E7FhAFoxbPz8xe305f-0m23lX42SDVnfLnF)=+cP`(6_1d)(n z)df}q%*<7dBQ=hE88YOnRCPyRP|zF|IV$qQsw&CBmms1QM$EyN5zJI^nbpi{M)*>- zHA9)91U(CNgsgQ{b=8vIudj2jb3+?^1rgVzS9KeFnQ7Deb?CPhgoCxBLw;L94?`6p z%9>NP5Hst)`lV{BrYiLFyxvg)t3 zRGFBHvz|@Wok6@dKe`|NwH}$Gj#NL6I3%7P^(cZA)RD&xEms2Bj&ejf@iQUZHfe>V~!#ZDXJbiHkF4I)zVQICFMkQd^$3v zxw7XdQkl83e@?Chdi8I|5}bONxe~~vk#N*8%RP`yeN#R9cq)qE0SQMPwVc`iU&yFb zOLZq#_W#!zJrD>~#VIpa0@>7GRVJq5ELZk7+x(Pjk0M`q4B5e@Y-$S|DX*wCQBk0h zP)ov|lIRHI!n0@BR&fHI_zt*2p{oK#zel*W9#^X>R zime{hovM4=7#W1@@WUCX_+7vzK;Jd%8IU^O8Km?>nWpoO68Wq3sBDn&uZGTkIhJOiS>&klLJqh$#j`5L# zAI1DgzZ@>cQb+`&sFCKU^84Yeqe74TzZHyMBOxDY?gI2vf~B4x>GxW5mm>e!FsRE} zbN&8?QOAeieqCjr4S(94eRM3FdKPT|he%Duc!MK+Uu(^@E5I)^{uU7wMeq=rzYK{3 zjZ+PlHs(kA)pi(XNJKGsS~&mK&oq_fe_@dS4m+PJfR8j=6=7ltmPqqg`F+wnWC!`n z8$t~Q)@#iT&<}<1%jVbm;p{NZZi46pg0J=aHuIPm9*SUUWY!t`l|-f44MH0arewd{ z5#a4e*(c4m1@zqbAY`}s!@0Bp_`{h;BmN%}0co5Pn16^Qi{Xbq0nqs2tcgggs`!8S z+wsy<>dczB)n!xDh;(%HXxd!fu+_e)3cm{r2go=?!WN_FhrcWRez^pEzqA20&Rl|w zI5xEs5fsO_g7rfs_h}9gep0aX+UoyFLU^0sHPfbmKb&iC&kuzG9Dd*4zHi%h7;anF zeZghE-~P5BNXdEP3jjKrut4VIss^VQWtIzU6|^^WHJ;QwwJR8FxWK&uh6^SrfDJVf zd=f3(1aUQ>Ve4N1Y-CKC?9mjItF>hb) z13Q4;#2Tdw(#R1w;K0Pdfb=AS7Zdoh>}*JBARcLej+X)Y18j`!h$7b0gd~cf76-ru zM+pe=`EmtAGU%!TWwsfREXM*C1a&4~G>{cT2K+!u|!htj>$ifms8gK!H+lm#Q7-ORXmEi_W7^PSNN*WJ>z(5EB zOyE?;iU%)NUVvZ$Aiz8*!sa~(aLfmi4a8q z2F5@bGKAxWCrd|DV30J>=%9nu84ZBN5JrhC8Q`1h_#eNV!|xGxUG{}x*mYm<3yx4u zR8Tv%%~|5H^x^^#sESiN7T!PD-Lh=U&s zy2xQ&)e5HQ#&lyYoUE%_ff5~-4$H-mbyZ)qSwUt6nVy+-RbQN0k+mYL2S$tvGb+sV zrmU;_;szB_R7B|=Sy%N1lpiDFA0yDLtNH@T&(_bD1Yy=y^*NCvlOrR+m338pWpYe% zOe9bO<}+rDnk+uZAQ-C!KFFk#v2`rSYh6{}krivjYJpsHs?ZYK?YRmvi>&C_itD** zGbY9(Mh1qbNsp&V1YFit6}oaG>yc$4Z%%GbBCuLlRmiH$PMI@G>_FLp@-^m6dS=T@ zIwhULf`uwb1i@G>SaB#4JSEmurGN?Q1a-m_v#u%yLTpJ}GU!>TysmXsCqTls1ZP{4 zX3eP+g!O(M$}tn9ue_O{hoSCdSXXs|EjwMUNl*@xApNy!GAk~J$!VnNFWKES=aX1H%V>&D{E3(X}h+@Rg7MUCqpD{kj zb!^2li>N(UVmwXAZcfVVKsl36fuW*EpiXQ_+mb74j+t4_VFFZ7BdhL5hCobkV@rs~j)h9tsxP?6Lr z)hu7g_J328VTT#6s5&*LzN)8y8q7*YX(e`pfPSJ_{9a*1;+EGFkrdP5VA+~nZ9t5Q z6Gly6w&m^SF1vf(Rz;Hk*YVnPz~h+XNEPMOBS(=cs(5^ok}?K)dh#*%h;k}YMM=## z`gkgefJDMk2MZwG7Tq}ZI0Wfexs zE0*#)VTjk7AfgpU)S4h=tg@n_L`4bW#fk-}Bvdj;)*7fKP)mXgArlrvw8D4_15&1k zh}@|3jtyuDQ%S@SkqiVR3@q~&05BL35(UNaSdxc-T>@hgRwf~;EohR0{{+W)d%pr4S2bh#1QHov{yj9~=NQ zGJp*ald9bv$O0Y!qwN3_a)3+|mMXFVP{3FK7(ou>>7yeU0EZM!hvNAXa z;G%y)+Rirs==1?RreVXrXp9ynX$`<0`>Ig8IWz$bz11?HKKCz*mNJ1_|Cb7%Dft1Y zAK>;KkRas%B`zuIINSvVK%m!x`SGWNz z-+9A*qZ3C=*&YC5Yy&FoEjeR=oKDDsR53tPQZ@jK9)NfRtDXe|0A&AwAaP*! zcR-h*Ee51FH2@6_hDY*!Jov$0Kl#`sV+db`^oZp+P`O#B9wfB#4uClx;vP`YPZ=b3 z!SIPT5G~Aemw>S`=HWnxDL8&YwMkZIAudgh2@K(6!LkrRxLPPh5#;$GsEHz|8F*kb zU{wsU)sfdeDlZWvnuBeqM}>U5@91Ira5Tl5$Cf%kV8R(pFtZ?lVCy{0?CSLcEbIS{ ztk1py#jR)(nZ~CJX2pq^iIa!H(@WV)>lO!lKB)2OUfyezrdQ%4-{f88VmB+?z$Rp&HzGWU|o4&cVhFS5{4m3TN=5M>lV%o$mL-)fe7$~>M z8S=U`v`QE*F5(PPFKd0R#DE%T(ZA z6(uClIK@5@Fk!O1yXWBiP3T8>2#%`F(-)35`I67(3j3}@HQ-#lP6OwClmka-m|@Vo z*oR+GuBbAxtL*3`7W26|I@Fvnb&A<@!64 zgH#;yUhbh9HiUe8(nDvUncfnOF${KO0cHI%8K&Kd zP#IEGtXV){%%$*ORFvaErh^(`$U;k^@(!H*@6eDxKXQ@~&%88qReWrU=w!^RgfOLG z`Kt++t8|KI^k~>^d^V=o`b#%hq85w_RHen;IdHBu}q&%Q))&5z0WM_SQc7&V_D*|8%Vqpx*A;@ zp8XI81)K!myLa;Kb*pD)y4yt%7Ihfd-msfaSxlT896^6|v

rGP9vGT>>#WXzPKt zUQN#^=Os9Tm5t4Bf<`Do6XfWUyvyRsG2JqY0p5WQ69BB+9eA?(9509`Oj}sK2WH|W zkVJ7P3Fld6`Ogk2!M>RD@E(AH7cb#I;Pp^4Bf#OjQcJJ>9A^htqMP3g835!Z3{0-U zZRt7k5mzdF`z+YbyE+BGfDDI0gEu}&*UKXNl%uA$8%kvTR?))E1P&n15w1zf!-Yo= zEM&-Y5C&rr@v##X2ld}!Z93#`Cqzlmo(p>>K$Dq!6B4Q)3Fb$Q@UUOlQ^c3(Uz$uk z@9L9ccdQDmKZIwuz=RN7J}Fz#3hy}F`9=vu3;@FwdeWt6qJ+RApIQQ<>heXgdf=_B z)##YA+Se{3=f(&wKa){6ho3;O^bssn54;j30BkL`?cCMi6$b$O`GdTHmwr-Je}`x# zqBc~_@g}quWhgK^@JMHfkA0UUeHqN6;r)Qix}PNxJKFBALBPqqeaS18=cK?Y20$P^ zhg`(UWF-r3K|>YL&|s-R0zj?I9U^}d06zd-(4YL#k3W8&)J0br)GteVlqH6yM>|iG$#cCr=DeQhT{ZH;`e&5 zH=7f!HS7C|6>FBtl&lJsDZQyRA+~*tiC=(gpj^sSV%-amt|b_;vW$>fdWN%r+s4SI zEHaNEQ*iETV!mH8GhqdksazidDGRUp_553&2~8Ky)M7k=Gvc_v+)u0!s{)#TU0?(7 zt}EgN8A5H8nJ}9RLPX`4?sX&{+J%7oFWt>+BIma!6b9@i_5L)#rI@cqnefF;oxEzI6wlVgR>jvgs!lU7b(#HijfaW0d@MlpDywqC*-th7@R5;W>txHaSNkFos1%eS^rDFh zuAPEG6SUIH3yOy`F9?Jv0W|-HSEJZY#^$wM7DM;*-F6{cwp&4~K)7c7?9_CX!OM%< zvFB!Ds7#A4ftP8aN6?{g8Cq#aTqGU~6s_q`?_{ylpkPHMafv!_A(iYgyDZCuSTl9o zOGm`n*wmzI2-8bPMBN+9g z5ie%v5Egw+8PE2MMoM0kd>9KKox&z3q4sJry1{{0LD9p4^6+T<7iY`Ez!-NtI^lkN zbb_MNB~>cb%RxG9K4IpAQC8OuZk z{PH41&5hR(S9{?Ua0B=8nR9U|PY}_#59Ty!yQn}_BkiNK8`}IwE_izHNtp$uwd|4=$ zOfet2wp0KpC0olsiDBd%$#>Lmq84W^FzI+<3iy^2pju!NsgC{>56~ z;NFp;d*1{5*8ay9Z+jl(!0|vpec}HR`~!Coap3nI1HvW%DH-mi1Nb+>4G5#EhX7N| z?o_emqU;Sfz!Wn8f&UJWBh(o{`Slnu@0tyWw*!n~9nt{8Mh=LwtcQ;R<;0DV{mP&orJXTEcO04sv-(QkmPe}EbHIslDXeKH6@q`4oAG@IXmRapnn1#>`t%}rcA zh;i;R1LPZkz5on9hBAcTqfEXT*F#{@H~@n_*}13wa#KCdxH{HZlXjuqg)uke8{$M< z)IEY&!BwiB`oIlp4B(B({ad#ZIAJ;VuuJW~A&bXC+4PMputXqI6Qg~^&U1T%6&Jxxre}oTyc0%QcL81^XdsJl$bVT%L=Z|ePw}(X3gk>_tG|Jccw%Ls`%Y+qxDDcH; z!#x#IMKJWtkuj)F#leTV_*%w3Q3l+=B2IN>NJS4pdSF&vSj}C>VPGm^QGyzIi#8FP z!zFvCPl0#C6#FtQrC0FsFi}GBgoC0Fl90-GH08|6mvxjgXqgZy+) zBP07LZRc+$1k}~OHQ{1c;tR(JQ>g)qF&f;s9V?Thf`vAf1PV5w{B#<9VuHm!}IeD@;wm2LNyfwV%BV&=_be?_<>Lko%_d zCwL>Q5FDd|In$GAoL0|Dk)c?e;{` z1F*;G!AZHyMgNAn%C`Iigut_y8i{ z6P1PBU-j}i0;|Nuz5w;b8CKO`LoY-5os?z8bHhMA@-hQfU>WEml7MyGu+O&NiLqG@ zKu01A$n*fq*8yjPJD_V`y}NjgBD@$;-u|jTK(ts(0Oq}+!o4i}0T<~ObOFFNz!?Ba zgo0;T>B=_%*eriHacCyu==UVmJ&VRFm=fax-i63l4yPN@E8p(A5u%9|Wnt zkm#z`hZ;gEU8ZG@BlITy1q|@$^JLvbW$F6B>87xv6p49EN_3|m0CtE#-s-Xaus%KU zgH{g_kPHH$b*vS&AEWM61oWGoH2?!zH)rw*U&oD-7bFDPTup-rLz zA(XPL)|E0GJ^{M=q|}H+O6?dys9NMF`;!^K>;^;E(2`d&;%eb=p?8t9-$x>hIjuFF zI|K7TMiy8jZN}Sm1t6*JYcN=2td(ip$Eca6!>pP6^+4W)`s3hNPti*FIRL+pVtEsq zA`tVzO@I&-0wjVoav7DiBw*upMN6Bv^-=K%&TYb3mPw#9}`!b3p096pLa4Jh$u z0}dJkT%lzK#MJ>n!2s330mA(n(t*DL_M<~2Jt1A9ReVdd2sK!g3PEo{})E7H*GagU)rr(T(*gwpXKE%1ciq!xCRr!Ghz z-fLHr$)Oc7qugK{&ol$?{uoYuP4 zxFaeT_=suZaJmsHl@bpi8$=qY%SFvgkxfAZMVZ*R=t~1pks644)Idh92N2W5Z%%(NgiWVYSR`Gkr0n)G^}Z4l$9ivHve6Q?fAzRqxZQ?*nD%8+o11t9m%Ft>J*xk zfd9|q*A7;kr5bHm&`i5 zmhbZzDB?~^W|_}=>7CRN@_H3BGBHK}oy8c^NP46uZ6UjoRHCSn^cdvuk(#uH>`GRd zz>rcS*6D5+(|1z)x|udIj^j9v<2a7vIF92uj^j9v<2a64;@F9Y1ua9Zdvb!busU{z zTKDL%H*nNwwgx&ju(umL>;QwUZtlQoK`+VV7~YKNo*O$iTHna2k)w@q?K(5EbUsLM=)vTK zaC#10@2qiYL;DFPP=zKns)DWDB7KC8Tk=INMv+`vqop1M6}k#-80d?9*ZtP z!9qg9@~XG^NB4shFtEA>oP82wWVsPpdpeVq>8LuBuogL5dwuS zWTgF;i_bbGy2*(*oPiip+6|E@-Q85M)*T4H1 zRTV#|f}(v8RQw=_C;4Sn#Ia(F;s_OSgaq&<fm8f!$kN^Xy@ zO}zzB9l^FWd=7GOceezG;2zxF-Q6uX0S@l&gy8P(axpz zBQ;fBH9dQG*ItY2r9l6)9Wk2l;!76@PWd{Dj==L3_avI3q^xM6Aes~KJD~Q8aRJ>R zEs9x=66Vb}mKv{!hT<&xW3<>pL?MWQv@}^}FhSTMX2`b41!xt5@cs4u7}o&1y%XUN zmfv4^F(r5lc=C9r%vrTTl$MqiqlZlY(`Eg5 zD*1mtD0)bzQ>2X5tv%}H))Ap7@kX+-ofsEZNB|8Y_H#DqmkGQ59h#DIp)F+8yZZVbc%t5TCIK(c|5<;yBt+|@%$PCmdP408T(nIMr1(j{$9FM{n8`t}v zq{rdy?CmYHvMNa@Y~gV828#quGr$N~?3Vx~n6j==Y<*)`vhgGfgfOxxcu{g7E*GjH zLy`7Y03MbA8{r6vFqDjaa9f*93jhugCbU0Ohqj1df{Q~e z-`Sr68!=EcpJqBC6!;KA=~(yGwrZkH1%sU-xdkAOTD9iJE~n4Ozr zU~4>jTR}mY2Pv3EIjz^?K|@&RWK)kkw!NnwNBSRW*Y4~$U-_lckK@lm@36d5 zj$)2s&OcJME6@v^P z5LG*iDh}*ji5rgj#SK|nEYX%7^=6OZ-eb)&$BBD~;$c#zT?ys!W`O9|(w9@->5;Lq zwl>LZD#fzYOuLq(?>oE(Imrs6W1KI~o*Cqe#~q55mGjMxb+4Yw!>Gr96NvEt8Pu%k z{oPP)_sRV;mg`?IqHY@!h!BE7TkGJi!co+MWqxefP$Nwz%`&jNyy|#@gr$|sa3nt>p`5&Xk#bx$W zUa2w3YytM1SXTf_9t=|xj^e-rG7=r6h({2#j8GhP1APInmLS{6(>@8GQY z(1iUT+;ccslX^HdInGWC@z{RI^$f5Va<0a2B$|nxk_fQ#@w9eG(ao*UgiAV=b%Q*i zC`!Tv-az{u6_ZKLa40MYt;8{pV6=ts;enN@87%<`i|8qExDEOUPK57bT9AV@77cXR z!Ve94yOBP)E!sAZHKe~D-wZ;-Q>%h}qIw+>NW?x3r^Qwo!^jq6-618R2zyCyWc@{^ zP!3GdfK~~WMiCDMdJSM~cOcfpgtbs~6?ee1^Gx2v)Jb58i?CSWLJ30<7)#XEz1-{Q zAmN1=zcb21kkF~v@~Fy0QP`Myy0^lnq4Bdb+=X3v3!rJlgyAtL!vSc{sK#Q`hj2(3 zaU3ifP_b{zpCk0N9yRoN(S}&&-CDlrsJM3pN8f2|5Y3yG@H4qM^SkJPSdxXM!vw82V5D_eHATx z3k&&r9{M;Rih{Jv6c^`Wwn(-}+i9QNVMZs}47QphPyK&sEEe6a*piqFuJTkSNi5@# zet#3eCc}e2*;$CEq4ngMK4YY6=bJj%p|K&}KVw;>GBoB{cz~X zV0nL;JhpAuWia~pF4rMZtM|!{*vQ5w_g{$rbL6VLW#XjZTjjWU>@Jb^Ka#MRQp$qQ z7nV1h>w`O{zRTv_J48Hh{4w)-VspGkKiMUwdj|xLvo0*(UP{E(qCUn!qNl9(=#!7? zt%}i2SsP;<8y$OOKaMv7drSzyJ==iKSRIBX@?q#zLQR@p$gKlc&_zj{!r>g}=7ZmV z(cSL-VYZ@$KJ?IApQ7@{thXhMzJx`3DzHc!SWpAyp6Lr|MH`q<(0&YQj!HmfP3RQoz3A8MYC^qII%YEVllIE!yC zjk3<35b?8rL?HPe5r~M%bj}UCL!@zL+cWy0lYWocUms^`)@jPb1R~R6oy-?x&H#14 zA+@}^8Y*L(#y*fyO>rx@+tY#Xe50NdV|LIl?`~rT4Szbgc8l2goG!_^mF&VDCnt`6 zC8PAQ(L!JcRdTQCXGPt6r@gp~eLQ{LyWfy@q()>aVrhU;y+n{?P)R`q+$i?woHo|E zMDqJfRFUV&Pu)n<)BL-sg!eazvr?nbQtF3nw$A(3-ByQ5Z^?a;d_8qrTcr}DxArM) zxqt7j>S}swt0MUtm05&n#~=JmEAO^wXbfttQ*G`OcA&3{9`tS(j0+3aF zwuR3RqHqMU6-$XDFOO6zBYr+|23Fo^Fbs%gU9V8PZZfyVFJ*oGxaV3VW%;xw^rHWPvZm^|i z(0lhqD#WykoV9>IKl7x%mtd1%!-*{^{wmWwg}35bq_6i84dNHjw`oamp2GHZlhf5E z(%}{dSOpFd1$&#(PrUz>z`j5@_&Q2BtRde$sY4#{99fev#@8;&VDoRHpH_TiA<0vX z?d5yg@IYsIO}#@%t4rv~W`LeEipm~Bw17s+zxQ1V%)9RLaFaUhF+0Q*g5N1wwZE+d zyaku}r-@~6Gf=qQv6Vktd{#_iPb%j=54~UT(GcmXr=jr-Es3F2N~FaPGx;y9!niru z(f{iMnts9unh;hKL4_06)a(ex_QS2TtYGH(d4)V{?sh=&+pP~2`gvJ4Ii4JmVLNi1 z4vMk2b~wP5xqp*-$TKc<{~++2x0ijET7-mo{I1V;M~nN>KF>KKV%j|WF(c%Z5LA1< z9#LbWRQovt!mIC>M@?o=a(^1v%?tAJx&m6|CqoY`%K~@?SQAkCfDyYXf~Fz)W!ib5 z0ljc)iaKL#Z6UA6DO}*rU!d@dbJLgM8IOjCnsmDd#l^me#n_R;JAW03SX!zXWciaF z_66}@kXE2^f!r|pXuWyY+iZvXVq_Jf`PLvZ^MH*P(m=HvjFl+hoSYLKr#!Gs`mM+V z04{8<(c8J(pb<>2zkHfL`JJjJ6%!nBlL9BBwIG6Q@kC@FL!`A zxGzVeM?~o1Zna`T@9*ay^BvVjGl~1}v5eI@nx_qst$Pl}=IffQ9G>m#7Po&os+FeK zT`$t;rBwBN86=i_YnA^N7<{tBQxf@<)GX2P@x!ExvlMz0|EV6f&3&myqmcjp60OB7 zyMQuUb-i4?*+D_aShpTjO#AjCH&;W^BHO9KC-d@{#{X5Av67%G%ofEGqiOzLZSFxRYI{&F-d#gtxJaSF)NtkY(X8G3@-;3^t zjsJfGbN1g8dS!R0MEq_C-s1erQ(pzxMe=*zjzUvEvj%TQ2*4HBN{!_0{*voMYMOGJ zh#=Jx&q7+B56Z?K>^38UD!g;7oEb6RqUMRWTa;T z2d!76B@1FCQTU80?J&hnvqT(%jv2h$CBQxT%^7tZJ!k0~FFx!c&fE5k@6Fwj7-nXT zr`J+`soiza`KcJ*5GRE?m_2L5;VmGDFfL}I+Vk_x@pSVO65E$AHRegS`}?2oQsn9B zyuHh%kz}bm-`_m%t_!Xc<>@QCx`MLf;xr)%RaIWE{A0Sx30HooW)4(#Nd_{Nd)+C^ zWme^BV^%r@DzmPcuAH)5Dm4n5&EBpK@6E^V*`%eztC{Jax|0lLYKydHPDkjEG?Yiu zEdJ~XVE0_S{12Q6(V9YkIXiNqxF#%p%yWG9M>(HQY6_lNLpH0vx2`q+93vM&PPi;^ zr0(A~mvpsd%9wy?thM@@>xP6;hgo|gmtL)T=hMjvb;jw4I>_X7c2ufU6=hm&zoroi4e#!I)Wc{Cs}&#Iu%`Rx{Gk8972&Mfg!g zJro?CK4V=)+0kM8>`Bm=PhplmjxKfom2825k(S0haoR$@17`@jeAzW^uHkcuxW)EI zXe%7>+*e^de^Mjkk!+qe+&H_Spjz)SIOOieVctdfn|_8bi_+p}-4%-=%Jv8(sV(ka znRwXczqnDx|3ByIK3{z|19n03Ba0FB`%&-40Gxypo>Jr^X*wch&uX{G6lh#f6zC`& zl{_USA=YfXIwB=mA7q{DDBl%9@(q^acK4vI-fqfs(e5^r`uJo>NQtU{dIjk|ENy)( zVEvjvj-~Ljl!fneB~E?Fis-}xcDsL67fPj{tthS}=||TDcYE3^tVIavI(=3)ZVsc2 zYH!W%aU)N=9dVOO>db6{V9-Rt*DHp$BjofaN6k&vP*vWE+mbVoqnAM?gZK!ygHG!A zDFqJ*c-SY|#l04s+Wo?Px_6ia-${NQ``c%JB%tO3v87BS0pSEu@mGlw=bQU%$>pOd zL#Lbj%*lnLiNz2OC_&-&Ph@;aI1fSKw;LQAE*B^w9A-4mDcrQ5c&d$%N`U>T-1B%y^7 zE)?)PP-3PZtw=X50QUQT&^yc7=#9&6o%fZ9qN9ym;9zn6Wr{L|_rAfJ;v)OJF=g-b z*xkQ^z=n|SE4nr)O&xW=&V$Uevh*ca^<~6dyToas>6MpI&2$yz78_pyDPkuQ=0_E& zNXh<~Vr>;>kxIn9U&;8EB20u21e+)aiG_Se0q9iLtgh0y8mOaR4Ljpe4Xy47^u1T_ z?^2UK+5V6xdh}NKqp5uT>g$R_riah5Rlk*B*DoM}!9IkpIx2d_kC@rt_uIv$j4`pC zz1-r(Ed$Vgf9zKyGef$wP&p_#coQLH8xH3W7Zo#5@t1NZW`2*!4j$(kadt-QOz9`J zH$Uan`M=6traYckQg~#5_hUK&(p`2;0EP$_J?Vwqij(7;mvUZmaTbmIpO9*=Wj~H~ zc}?#}^HP_&aG3}27c$pmfUd8cZ2NVxEID&i>5=sklZFX`y%4_*^W4rxl9@M%R}%MkI>ENW%0x`O6C9i-VXWu83HV>-xNcdDZ7Qu`*@a z&#jDX9L`c;P0Ld@Q3^^FFWQdpzPh$E%T1+O+Pqh z-V<=d51mmb#&^{wX^<?rjyd_G1M!9_9w_->4KeU z&_H@O2G)hy1#=@|r4pi3R>1JK&$&XZEZU9zR7fqt-<`e-^QCmaJQ=Kimm1xq*xJvp zR_FzyEG@#VBI~mLqo3~mLyU;rW6^B(C5ibx^O7vU=-pOP%^wb+if0x!hKufN z&+1U*C#JcpM|apPpbWL;ZAQ%1MJh^2d+koazW#PXoqQ(?dd|4uv8#08>e&ix368Tp z;8#WP{22I@N5;gAB*`fo8V|M^3BmZ+##y5S^8wUb_;M(-crTWr=ApD-DMiQS@J(+V zo6~<`6UDv0lZQ$7r*xo*Zs$=uZU`NWKD_Zc=RH}nef3MGkk?+Lcf_u)_b4MXdd&!(8K5Lt1`fJWc-@dtHaIL)oZ4|(+cn$S+Qgecn99zf$9@qr zi3br|ooXo*;D$I_=TL7ofj^I82Y4QBnp|lZGt@Sdn;|t6e*01;Ed=mFmvNEA#o5|U zyrBn&L&--oc`bS)&9q&4YRlHP)zwc>;a?U@PdNK#PvV|_}0E$*9V~HeSUI` zb>IW~0kHbh024TNmMOMr3wZoex=9~@qD1Wm@xI5QQaeSr+@t7tBV+xt2QhR1D|8u` z_-CK?I%XZE*u9Fg`}z0!>j>5M-nX-V!J76bJsIuZP!vdmGWt@DpR*~IJo3`qD`|Hw zFYSIKoYh!;!cWxtW}-1u8!n80t!a(78F(>J3sDCnRj4&G()|j@rpk#-DIVgvEDJ1g z*Km-Ad04RTk*zx!03QPvy$L&DDN+zCz!N8VQ1M1ttyeRf4IKbSGwzBv9td5D2G z@acu$dNW)M?S%P+z{w*V%tu=S3mm&PHe0Cz6~h z8j(Rxh$7c;)rw}59JCg$*TI(ofcm=BtB0X^go3&{8~h+L6$r;N@^TLv%s&I^NdUGL z^HOHskz!AQszZ1KyI6VbP&=?Geg0_K*xKOW0gqs*1$8oL7n3lgU%jAao?8uA{8~1c z_$c5u7V0rv9r7(qaCiMvfKxT@1TT)3Ib^^DU=Kh$auEc;Wng)My@gP>DvoQbt@vo2 zVbMkk^U)wV@xTz+NHry%_bOkKztD^$26=1F3fBlfJ_MU#TK8WnfAjNpwKPCY2&Au< zn){J-oAn4E3s#A4bw6^I--&PkQfp$6X@tD$+y)-ov)^k45=0B8uMaWti#ef5MsmCS zP5-vx>boTSP|Z>+j~P)PzfaSDKCt~Z4-8uXz$l0ywri z#c8LQaEYm|w1Xo_Wf_Okl>I65dYH&gvAJ@2yxQy?1qU^bqcwEP-@8Poue^zr`|XPr zX}6@c5-=;p2ZKge9t;o!owmzw>tXgpcsTon9psNCA;9gl#^+4*>bxI{GjduYY@X#m{|Gns0S+XFcF z0k?H#ip0}Um;T`SRiNzfpJQEV$Ijn>NU#7F1vMi3r)B_}>J|$CnLXd!nmbZ8Sqf_% z2NMG#+fPPa421&uT)Jox!vpq!GxbMp7x*aEy!tb=e3pZJ8KU$a4_!BHR4N_=bkl_N zKf;2R+%eUoGEwP7zsQ>4cN_mb0{$okAj{f0k9oD*^rN-b*e5NO(InGir+JfrB#lhU zK>S2q(xY9(hfrht=k;NGOokH>k)m%%8mfaDCSP%{IFSO(Z$0Ea8<}^hcLmC~o!93nD~C+u9csC)X<~scpB<7Q#y-{=>2U@-y1oVRoryI< zA+an?OdwDh^}MkTL~cYlBw=Nax(NT@GfzX~!9EleP8v`pC&+>qTWKo=BJEL%&xV^HWiP;^}nJ zhE{7CkI>dk_bJpcZ~YUv?lTDw&4l%D9Gz(^{Z9t+{XoCYz=d+?V=5aHSH*0;T4YG- zpD<}J$=_3U@NciRQ2y<9nE>XBY`x5E9=R%I+O5|GhuKJF4#`)hEOc;gG-F8K80ndZ z-UdNLSbl7Cs;nK!XKG8zv@l;BXsB#b>o&i03vK?PRDcO6cNgA((E5$5hWQ#U8fPYK zg|=V^I!gao9IWf%4tIr;(3r6gNc_z~2i73qDuJd~8Z6dj1hGsqmT}mLWCg)JI+znE zp$&q-2t-H_ug_AOW}J9hy1HABkr|uYy`&RuB&LLpA6Gj?s^zi|=s z1fEVsxrC5_ZxuJ{N*erGRV(?2<)iLf%aeSfbQA>CD0qZoF0hNb3(2x z!EvcOM4Wx7tNS%$?pWetUmj9G+Or1y1VWoT7nOd7Vi#>+%I?tQfL>qxym4yHrau|3~UV~KV1?@L*#L-%fP9*+m-^j#u%SfJhQ zJC3b^0)t%u$qC?7b2%5uDBUu*TPCMU*ghT=J?$(Nt!7u~ue>^z^+t)VQk-O6c~1B+ z*Q6=@gKR1N{b*;+qGE5=BoBt2l1&Fm>9rjcyPbciolnjxYh(NVcAH!d&Hy59Wl6@O>u44G3p5 zT+I~P=9s^Z5OiZubqT* za0iV}G80h9!|#z1hq_1gD8%uGSb@C4FxMEFm7Y`=hAAsFwf-slrE7KkQ;Yl~GVQsw zE%Q2xtUILDSI}8>c20fQk;GsRT)9)OrUgxHlh6L>G|%Gw{s1eifECbUB7xNV>ha0i z@zK<|b!@bn>aawI&=s{XgpBeFlsz3=W6l;BdYz8j8%z9v+ zJ<8V`vjo>ZL6V7z)xY(M&T{z(zc;DGP{ZT5$7yXELRy}bZ|o*)M24zU-WT2*Ud9M* z0!7`aL~Ng`kAR(-uSSHCVZSyXYAM7EaIMO2xUzkC{M8Z?&kMq5Y$;~?#11ZjP33x@ z8l6uV{(utH36(=lA=rG}5Y~R6TgiLw9obur$N>x~`N0(c(QK7HTM|D- z4z+^N32-ZIi`oOsrezUEBir)N(JP+D3I;kaIvV`_DBGL|PPGI{Ga9?@sXRg( zP6g)yf3pshZ1ZFxnN!w4lZ)Ra_14gbN0jcnT0$Qs3;b~bcd_gPx7I)`F0mKv;!8`q zixh2o?tsaL29UDjd@JvM9=dZ_a-_mqlGrk^tWZZ_ZA$xV< z1WSO|z0i(pU)c}^s#V)TnozM}pXObXPOJt6GPt*qBQB1O8;Kr%s*^#XQkcDI zRUv=;R=znBU8Hm@#RolPa|NR)ea-uN|92V-R+>Y?@^2%P;>vzh;a!Hvb)F^eYzc^B zXexkOGo69C5H}`yxr=O5Jf)d?-vJbLp$#p4m*B&>r~=e%(`?$(YM23}|}k zWWCOo{Ey~)uc?F3l83UoK$9+km!hm=vH>ac`UiKbG!4bl{vR)~ejGAW74`QBn)CM_ z7Xkg`A)&apRTA)Vj>u?e7q5F&A3C<0ldFZe#21W4Mg>A*6uy`SMF{*$$Uk$L;GW#3 zS=~$5X*Oi#?H|tdBVK;Xvt&Up{g?>tsLQswAGBLZsy1YDy4|nNWUBI6v={-RI8ku% zb&+`|6zV_vh0PBRa{0ZzrRzv)7K?~DO<3mtDSYTmbE+U}T|kR%jdZ6o?COf~$2KoS z(AWceBDG=?epmC&%AlTbZt(vJ)2dzeFrE+k9qfk1m{y;@ck73c*7zF8aa=+6OLWkJ zF@#iHf$V9Fx(bgx(ej zP?q>em;7boN7q^B;5>{^+BaSa-_N*HVL~*>Mhxwf_s4BLo4Ugs5t_r{)N6xvM>ta! zst@mY#(enfF}u<3jJ9_)a5SF!qHVTQ7pv#_Kw;6=Y$mIpxZ3+~`|pr`BU9?Q7U8Xz zIF#WH1E~H?A7OffjTI*NM||mym^C&%BQ~=kCmMNvaD-l2tOl7XLzmQ)>YbS}tK_u+ zlkDP_3%hk)D`AD(&>xM&E69*m!mU*L?o zRicv?hvWrtW8uVQ|HjBUBxrp6GL={th^JvvLQ%ey{>$p;b^FRaI(%|tAvL@HR~Qsp z6!xl!{yJ{#>W7HE-Nwj&jFwi7ciE$XjVmx;=>2xZtS0P-HtWgR+E4aije~ymtohSC zAN^iQ7ImIsGJd~WN>7CQLgJ)^9FO9S)gqU#M4$vbLqag}eu7HnfQvLX+6$?MC#4MO z4Xrto*y71hE~$qeZ3-(4?H0HX^*u&~n}FG);kf^d?}C8DwP5@y=}PJ+=(}V?{$9^T zY3!$ZKSz$)LM0T^GbAdOi$B}RUZ0@MamlWzOVZHN$RdTEsmyd||0FBaR-xqC=v<0s zBhN-4$X*)kV|<(X>M6X(<|v3}>y@FVa`s1)Ss`zN$ZU6sdI!sKkhLIe36G$lOBz>1 z@FgB@ut!=!oz!MJWk{yiY9xIHQ76Rj*ovUyP5O|u;Sk)bQvb!&6sd2vhCDG)P=IF1 z-!v+g-~!(eBf19xX&kY)xGSug31_&5m-%i=N1`Kz>mYhy>ZfNQPb*0At*i;z8)%nn zQ+HXVDnxGp-7{qwI1rS5diNZJ`ZZ5sW>X;OOIr^)6><@@LS~e;3N5L&MA#?k^$>1M z(l5xmTeb1#k5O^v@uRvxW9gYe`uD^Ts-zh1r0o0m>dOsjw$e0Bjtn!<{E&o}iA^#3 zlYALkp-U=s3GxfM+_}5(4QHT_XX6(BmlY8av$(*=lNAc z1_H#`fKjp-hxaYl*Ma2IV5QhnR}dxwTlvQXU@%I~_{p+`(DwZ9K5>EJ(;9XO>2c(84^! z2kO~_`|zL+L0u@9zfTDWG9`yi{`nBKVp$Jym4|*`{GmR$gfFTnAk1BOGix% z`t@oqk67MC0+sT-F>k$3Pz4Jks*!_&+Zy$0%Gxzj2Nz^Q|!u1MhC6Io`W2P#hrU7=5wpVP?(M&YXu`r@+Oy`gKS&; zT5B8RL-}uXufyfWBI{HxpVV|u=rF!|Z^N!SO<~;k==+?}=fmxwU$TdcO#?WMJ)lcx@IC5;0F3EoRl+WN@J+k8dJ_?YP z=~J_RiieFZ&&zd4dY%;J9UP9oND%1Kf7O`EkhOPYv)T;LPilPUFD{QS{+o9rgt{N| zN_CJZtf=^CTa9DdjC;NFr6kO=nOH^?G24-71Sd_~P?Pb9`f0-AU79`jS6t*BKt2#fW zsrfe;=&~Kxz%J-{55eI~eOU9l2>fQ+AIg8?P^PA}=fdz?kH;y_leCqc^QZmdDBtMT zcd1PG;^fDLNGExd2~Tx=jIrFNE>GrWy*t-(_iBylgnoTnZqTZt!*tq69J~Xqz+f?= zd!>kB9*gp_60!ROpDF)SPLp`6CIfE0)VV=zyBDwiu+mz@V zrgPP6pZf1AJE}WT&LKAnclYXQv-$UDCsCIcEenfB{Ye#H%b)kZtGO@`D|+s!+6DDNkukxkL8*zq&hcg-f_gl(xVN^ryFU*_AGU4t-@St@WDR^Ej=n z;D$rs$E8CKP%u%=8rLY8QxNMYPSD-sZkPhawaPlFY1Gd}HAo#qK8oEOwxFbWUe$-6 z(g=|PZ_9u(wwB`J;-6q;5N(jRVa0{vDMMo55vETtg%gFNpjAMo%IK6J0`VA%5OlE! z5pzjP$)qL%=(kFse1#gccrIGdajPC&f=K8ln_E!GGD(nIOT zXbXpa${_&cObHna;%lRB0iny;g<&z{!cc0xgGYII$c5nRvz5z;wUB&QNB~sBfUPLl zNg(>KFE?l-WTX=IE7ZZyQvet99^vr;Yt#}M2fhVoIAzdsNGML!3nZg!L4cg;1%-v6 z3PG_+bU$X8?o!zxyPLo(&IY(@Do0gx_9oWElq8}93xjxbjJ%zr8ryL3mk7Q z*UxD;8#B0CY3vYKZH*2@{*bALQ^{))<1MQq23i6GL``)L(Y#AYuS#Qa{rxDsu`;9W z+(OTQGAz`{ntA9vE#8{(*|#&-SoJDa+~8l(IvG*ex!PdvCrzc`a52!T);JN~+yppW zkT1?O#a(z(Fro zJfi*ULppdYjcJDR--S!VoA~J_f)2AlpHc&d7%G~tOnZYIa!KELBsH+Z`Y=KJJ};~Q zGuK`(4W_+cGp5D<&pp`M-8TFhRe1mL)P?EZ>0ScBm`>GQKq3Pn^$drbpwy~}dw%ag~cFQE8 zw&IqBP8Dz6cLcb!j$;U^_BB+Wzj40ucVpI!6G#Ji1Jb}oq)lx{C|5@`X@SBRnR{`9 zC=Sg4>Vp*g#nQN9p_4jX5 zQGrEa^K^YnrJIm^;OCTETv%|J;&3KXRMP4zx~0pPL9L{N^ESO8fWtS_1yd&0QxU1L zX8{@tOV;WOw8q!f3}~)&ch7k=8`RH=3A3+O4hz+VcV4NEj#t7Hx?SolIXrVs$8 zQH?&6H8(9HNt5_jH@(T!EhrB#s5(AUpg{&@HG^;~Xu%|6%3at0p74TfI1w&5?FC#V z5^b~@tfXKx#izLT*Ux(amMIAe#AL)y)@LZesUeZDN^hOdHBD&*FF%LPfrws+X8~Ic zeXY>$>MU|GkOwttrnNNa&eDXy>7 zs39%ot=r;K`U%SkJhtB*oan0I9>Fq3-yi7rh7Lo?RSbi(ODaV{a`^g5qy<>W38_{Q zRHng7eXOYXk;K%Lk8*t0JT<; z4Oi<36M$8$2D_Q5(&P&m34iIgQO>a zt2|BakEZFlRS9At0=3p2wN@cHJ3ASK)C!~4nrp2zwN_EhG8UPRi4624Y8BN1q?v5| z|0jkQQ;zgviu&)P_tw*=DC=f5zas|tU_cEp zfJQ(K(bSn609%keNrH{&y59BM@$b4x>m2rc@|#I_ely{3Z#VqmhX02*$o+;xHfpUm zH>C>x3q#Z0A9wis5WL+KUVY-=-@1Q1JYB3!czr6fS$Ip)=i?o{cA^DoR;Zxt{%BSdGiK%sYwp5WJQgO8^LF?-HcHPeSUY&csS%+^*3WhNX zbl3CEddKh8H|sbgPTX|$zC2@mFSoDPO-WX{VH{bc48O7{?PbC}doItS#Fa&9*s~}! zJWKHBtXL$P`V_U=M@O&n?Drx4&jz3R&}t`B-=L@x!*XRcQ{SHbK_Y6k&ldizQDe#-i!JNE2fi96v>{Xapgy=-n-&jNwsSe~#h z&yuB>`jh1&s>l+wS`UwBQF6r^%MD~Hreb<}(`xqBgF>Nv$Z`xtl#9o+D3oSVD9@r$ zF6&tzQan=~tD$5mre8L`g14! zsMYWZjZGW>AW`R!pHHpUb@^r;=i76T%t;o(h63i|bXo`w5I;cXN<#~4KFxI-ux96+s zn}c8D?}vNq{&(7Z5BJ8u8*cjZ6F>P+%1EtdcT9lLIgz1+&UU%YX`KJUkJx-Veh!)D zIHz%RwhZ=@q7V74_OhnFQy>j7%AAyCrYsXHY(0zqQS`84k)U{{pF~KCqXty8T97z; z@%%d0v*U055o3vpR=Y3T!(%-Qq#=X#><>%y{OMl>fz_yCHBx9qL_|b_BuSDEkp+Yp zV~8QnnRyamkT?v3D2zfNhCv_*AwU=gK@bFCAjS|x2r-5sX9Vd3*TVUd(sD}kt_j_h zg9vHw***sBcdPM&jc$Zl%4iMKZ_nIJ21u~a49(Yx(y!8p*?d{tM1N($WuXQ1RHhEY z1k%lOkijci)v)4l-S-Xn-VJE>GeHtS7VOE<0T|B$VH@QEfY!y3L)sbLNq?9`h6GIC zH}k08#?;`{`z$|xIw<>R0ABU1(H4T@EzcqsXdaJ(gz2GU;8h5w4#77-b|1^(0bHZ* z5X@8TZx4}yZbI1rBKz-vmH;jQ&BKH8fNBfz0+IOulk@-*YKQ{bfTg+^&`c_7sQ^UO zp!+vqdC!19F9VWlg$qkx8g<mm#lbo=TD;F~R~x8`03h=M^v#gC zaTP#L1}&*Sf{}`VFd~9y`dpZNjCCc7Y`-vlQ+T1K#P_l2&6Is)0AfpevRwm6!T^n* zAkiWKawe@-yunUdjozXIgvS}6_W;K432hJXYKWmV0J&-aPZ6^4hwV|wG!*!+s_?fC z=BJfX({UxBQ{#Kh$k7yyEZ+KU4kKMzzb>^7Qy#`ZwnG7>hL||+EcB@b?FxLWc(3mg zWd&UTu!Rn7)*U3ppg>Up(1!_tY>_~wE_kAt1h*wfO*=<+9&t=Yy&~21V)TATcfa9r zKZ#d~fqp{pU-ExZKlzt_!Uyyda892T(+EWuv?sVXJ&DWWl7&E)c<581T`tKaiys~J z-@*KdiC6g8>IO0Z(h@*f08BoB+|S8n0}R3gUbX|=3z3ME0504_NM||#5lALJfWDz~ z0CZ0P47mS*2T&SWrlmdr5Pk@k+Jd)t><#0rLhIY#q5%bV|6Qb~_p{r1+zO5R%VcNP zzCZMNo`=(DKgFBShT00-8*Bm)>@O$`05r%Xh!fC=1rXGHnKeQ=rU3l*YqQX9z`Gbg zBlBylso)z4KnB2?tF$V_yC3R-yg4LD_EuX3V>~2 zT4RZl0X&arAq|*cv_@d@r@OSL)J~>eE4Vd9RA}qL0t!tpj8z*RPFIT~HXtxXcH-6j z{sCO}*d90lQn^~pK|m|1ICM22APLgd-j5k2APz9I_yeE>Ho5uSqFH4OfZVUq_WzC bHl1X#9DZX*GC;s>i-T)kE^Ny-`lTnRPzibX literal 0 HcmV?d00001 diff --git a/kernel/tests/data/v2-checkpoints-parquet-with-last-checkpoint.tar.zst b/kernel/tests/data/v2-checkpoints-parquet-with-last-checkpoint.tar.zst new file mode 100644 index 0000000000000000000000000000000000000000..4f6833a5a98858fc08f448525a29a981223d5cb7 GIT binary patch literal 7868 zcmV;t9z)?MwJ-euSe^9%${L+NTOd%Sbpwh;+{jbe6jv+nN7>!~Kc7?WZa_D=OG*gG z3pXX!BZt^sqD5Oo?#PG8@O=FD?wK=v!z&cX>gslYz%t?@Ly|~j_rgx&@94{QB4wZC z6Sx6ZTEbhV$pgXz^#j8fLOK4k{0oAIy}v|*2b`m)9r%4hc-Z?j(xPT!(wa z!k+qE?@Tq%`j~5LYpv>47HJm#9ug7|4W0-{SxJEbL|_JIXla2cga**Cit@pRkAZ}f z34&apVkiRC05U=VGNA+_Cv?#P3UZ)_B!FnZCk7fD;HdJ22?!f}_+lKUKsIbmz&SR( z93~jaWsxCf034_Spn}$im8BgQotr6If=)P^9~JoG!Y;@WSOTK>_yRK|1U2xoYeaws zatAX!A%IN=ZjO=#$_+|ch@48GAx8pOggoE|5^T7@fwpB`Tg|sV=JM7ZyW81d+gp2Q z{;#;1YJ=Mu4(DyN7WaQmb#FA9ryPS%!BoW{wSiq!b8R-(=44OORhzBW&UXIxXE+?&&XR65cXn%i66?v&kr~b-DG{8}`yN&2=tx ziIYP33@`T^m*-2nSnkW*Fc%7Y!Co_5zj1k%Kjzlu{ytcVCq&8-{6JyNQ2Hvj)|xME z7l*yH=C5+Q&X?2||~E(OQ=$O>5U?ZM|nUEG7j{YG8vsw2ijj zstgq|yxO)ko2%A*+qO0DJ&gwt_+=r@d(WG(xE1HVJQcV5?hHSOMWUk~ZeJ@`dJqPa2cSJrzE zRKrsWMO$D1f|M>v4fEtL!%M#)kOudFz)$o2c$=irS|8C`V?YKC8Y(cLbqQMQRckGy zwJvpRgxCxiG2SnL)}?Oon3&c*7*^8p)O2cdKUiPOlgjF%w~94Lhu7;!dZ@gz#KeSi8ou9kZ1wARtw69 z7bviKHjl|@WI(Wj2~sxC=425sJ~^;K`+&KH(Ap&fB?ld{?7n-p-s_U9Xs3rM6-;2K zTekJqT5~L29jQXdg;G6%D)~ zWRp)e35Dfiz!d|kwAL*>C6wb*!3z=?wrqQlqKp6!ETo{d_CWh^0%Vrp$w&%&w+~>J zt&P^K(y9d(xa`=5l^%rFtP%?jEZDGnHjmaYXUnx=Ff=HEmLGP`w()Mw#@bZ3ZB^I% zw#f@81J4U9D8Z8oI!yJpcJ-d7uByrl@@Srhb8&1Z68$U>e?h(8P;Pg8UpGpLKF5^n zMzOCOrEabp3E$w42g~pc<#bis)_a%8lyg^iWty3pAriayxR3kqdUI>-tYvL~R=eHW z8L#$jvz6u5Y%s6wx&9K#JLaBt8Zd@t#8B~K#>~@N8VdyH_(iin_h6wI(2SXi4$JWk z8SW{uv)-%St3B6+I({ z^?t&*r^J3_qvtx06~cH2izK4pFBoLea~*zv!XP}uz90NCItv;^GDPS(82IaHySBe1 znTXe!YAah?ZMN34KGw=VAr9IMvAZ?g4ackf&+cru|GK-I%ERr}8_TODz3Xk)bKN%- zs;$cU6{oVS-|gBYc88n2xAo837uU{`-1pCR-m_}W<qbUtH!{j~BO|7sONm6LT-tjsmv**Q zdAuZBySwew*@xRdd%dll@se!oY+e1$wqxI(>u=nda!e_ac+`cn{NvzXlH;Cs9z7Yx zE5ktG?`e8uBbVB8DVt-;G1D9s;u;|G7UF!-B!Fx-RR8-JevER;Xo`~7w| zvo_}QUy*9;V#(@l-I)fW-&$QO6ST;alTnN(Sg!|@=u&^7R8*aoGbr>^UY#N4pa^l zkQL`!s0W-YoGTzHoNu8j94H(pASIk{AsTM3ZmxiYaK43PxPiKXqH*DT3ps_jx`nu+ zP2qeCHH9dKDAJ%f-$6XsT(P-ABf|L((qd!8#>lJ-=Q~IW2r3X%vn!nMAgG#S&CChs zI|xZlP1Fnt=Q{`p&5`D~0}V}7`U1}P3M@0n;}n#csF|oD498;%#E49kOeQJ^&i4u* z$0OAiS5%Y>R*uI243TO@l^fQI+Cj7cW-O7u&H5>upwvM1zMrI$CE~g>lc+6|r`?Z)#euv!sE2 zUpCM3r{;~zo9)^;%O8!y$`K($L&G}DpVH+0Ojx$k5 znW$q_>)4879a&K^JH!-?X3+>!e6dR_MZ0XGBAbRZPqAGb=GvvuGc_%(HH_(6m&)^1 z^7B=iNYOf9C5ykx?ZNpfUFS=mFB0dawF|}pGqhj@)5Zw|C%9nuY@Qu#)(vJDffo}v z(5{*sfN+@s22Dd#b%{+&HR-}@t#{g$`Xq3~0!^DtvdJu@mnI+hL6Rm2l7$83O%6nU z?7n-n)-zh`86xe3MVe<$YgWyMbu2)Pduh*G?;1@Cv0}^~ty$$0DlZtfZU;kbh;d24 zj12=~xWEZyuqTJ(Bw__gSny@U5C$h`fR-o^5CH`uAVCSDl7WsY7YjgIvNM9_NecuV zXvu=q6UzflE%0oR1IyJ>tV9>3-3qN=)fv|MX7e^1>ua_eYs0y%TT@$=<_-Y%#j`Kw z*%@AtM;n0B{DColQ+LF4+xm80S?`B-{1`kWw9n2B8wq!zCg&>f3~;_r1TmUBMxQ3m zH+r>G+$qMkaK6zjIz}C1tOm|E8bzn5Q;aR)e4|Tpj5)@b3Y>5BNKP@QSb_)6H~Is| zh+`~ah4cNx1*dq!DK@!cG;fS0j!fMYF>i|e-~yFNrG@kTQzS%_8KOz*3zca@)$^o- zO_SNw^P>s~Dy8`YWh1QkxjlH&`ja@z!ynFtA=5P-O%+P&z+r}Pvr0-5nkSRyNeSoM z0~D&vsu4oG%siR0nI|sBs;Vh9$UHqkHL0rNA@byUGF?wyINv6ORGC#*y0o6m){~PL zR57c(kZ`^`6)T9z3WDmsPcKYlnrrgZ#+xD`s6sSRHG!bgVbfGS%~Uo?2B; ztSYP^D|Sp2D+Fv42uzdGxLq73gmX*sd}-2kzFgT8`j{(3LfS|=fa5%+tXKe%7(!sZsDt3jl;(QOu5QBrp9TA`*ZfdWbv zIr+qEN}z!17yDKN=X)rKjKy_5Nl+XdM^BQQ(^ae0YH10Ebg1+8A|(qlYieXfM3N*) z2jdlhP&gFi5w=?*Q(?SiBx^vKm0Z zLGD0#EEF!SrBtP}&BbEFhL8g6tf>;P!Qy_vz(GMut-w0r&k0J^daC>jX66UW^hcJh zDbG{-<$~Yt(sh2#Z#9YT|JjA>3)rr5J94vb_w;MkL2ebDKZyz1>*DcL%FT)Wc3r&B z733^P`Fq0YlHajy=uq{jc9F(0dzwxIr5{H8lt_z<s>qCOV))Q0+EzeyEuQpY6zi!Xuerz=j$0tc zLKuo~>(HQFk1DbhKEc5Pr*=QQDzYO#i;ch$-vNRTgD7a_rqe|$&MS|EjsxsVQ7N$& zQ1AKf%O}EUc^lJx>6dyRtQ*~JcJOWf5Y2j3c-Qj|fhAOEfZ?5j@DmW1Gu!aCpA_r2 z(b>aW(23O|Ku2q~30G%m$+%Q=b&%Xiydb|zt>Qv}m2!d;Ppi9+OqlQnRJ>y=E%s@! zg2A+u@p5ca#a)q`adMKF^RH!bNZJR$UbCU=`!hi!m4{m}&YVnCCk#0u21$fB#yQt; z?Ay{X7)E`u-)M8fka1X=9YBfGen#=OrK|1sOq3I;k3|R238r4l%G}N7u(mrY9 z$z{e4-cj;`7pFlW1kV-D4#a@4?KLP;Qx=GH8 z39gzdSBRBft^qj22EfR&V$+6sD)U;4NK)=Cx~HW$0<114y#MXrwBqGn=5S!se=c|P zAlE=cBPut^t04-|iU(Na6F`ZAyZr-=12-!TxGSKSaSIAGFgNyr@xFg_m-eu&)5a{s zngfL90Q6v|*H4MwJRn{kpnx>*pQ-Eh{x2EM0W&)WT##)v0qBbZH1)qERc`hMG>roX zJp4T%jAjCQ-eKMP0h?*Y26S5k=)wT-ejgy(BnJrKm;shTdv(e;0K41=fKE9KnbG4S z_0-Z?=9(1;u$*HX0KEnKA9jEyewHO1n`f2>AiW{L7z{wdN)ODYgaLwQq3YC%uywpf z3k&qT*6kE^XrC2gQpeq3{yku6Kn1dNR*oTmkQ8xXb>QLckr16ITW6ds=uK7#01cYB ztfmjb_KE+b%th1cd~8RI1OryR}BL3rMBbn0^z$#7)U?hZl;M~l!|4AY$(CBUqVhp z*!=Oxv>ro!A7RuyTM|hN4X9cPKwX{J_6euf1nY7*vJ5Oh8c#t%gQwLjJF~%oNv+cs z3*Z|ZvwC$7N!PeHy6TBdCdfo*iRNPUHJ|;IB{7NN1nI#WOhc%C{!doASDoZG7-qA{ zk_UAoE(42{Pqyqd7;^~-ph)TKXqUk)A>1b7pJ*c((8E{*FE&0rJO!*`qze?%(N%XR z7%+&Ul`Xd*ClKiFs-+#R@!*gYRbD-NQBXmSsK&5W!^6W z@HQ7$C2unVYKv@icY1B0@lB+ynJ7H6hKb{VKTQC12noq**kVIEq3Fj!6hm9HQYmrK zeMDLVhEEi&Tv-f2oz<{3fjZLigYjG1*N(m=qEm0Ymf%?m+*pm_hv z!G-~xlE#s-{aC*hni0d+d~+s9Zrbw`)aF`sHFB7inH)UX>X>C69hQpIoQ+;{5hfHH z%RME(V`4+N6L)@U9L*@2JQtw!Gq;b!3@KaC`56~)bak*Z>RSMnHNdL~Sv12_xvFq~yur_pZg4bIGNS;?#y`#Y=6ni(;)qycLNOAp}p;p7u zu`j9Sq1cDOnnZp99oA*$Sn38FbFI*+=8)j%=EHOEL^Ek}ur$`|*?h|wYYWkoE8hV> z6f;5_Ek}8VdvGs$|GjyK>7K2(=1w(yu;Co3o`=AJ&elV>Vo0SUa6OXyWgi%Vno;+D zX-HU6X1?)FIpePpaY}~3`59jqqKv4Uw;3IUWSA;4_q_czm|qIoEF;GNLYydIfA zl7b%QI)A%`M8-LN`m_E2CJZ6_0bX{8FBS-PFXQv zLN~TsVWuSox&W`Uu5lyy0FH+mtn+&1zF}q@1yzTpw?>@@$*%^e6wG49065HuhO)rn z0u~4PNbJiKG%)Z7dcfR$f?{VO{+#As(>HG+vcPPHi*lLV%?b^Jn1FFRXRuF+ zAxk)2L~szOz514enc9fJ9nJpdtI}|zc0E<(@A z^>>mG)ZKVl1a)Wd@0Q&UY=HT-SST>2PRl@)7TI7WAZi}vW;$P5NM_b*hs;MnvcZ8$ z_=f*LQd88#e@m_NDon<2NX6_~)oyJnd1#6f?j_4W$-t$A$Q8cXb;v3MYKhib1T;w@ z`4Wr{!gbAc{S}MHuEh(7qak!!(H?SffD1AL+qP2@jiaf6L zN~L&A?zD7(C-nPGFk_}?OV|N*+vms-p91b|1qNd;mv*w!EtPQI7C`a3Yef;HhuAt) z&!Adpu9f|qxW&bURYOmLg5st1CF#p~1T|R10FC%|W$q{A?9!p<=;7j+j@{nmDmb7O zJht88Kb2Yu#kmlE5Ma2M9yuLV6ZaQHjR7Md$-tNa?qbGtg1sH00749qna9!K3MzS* zH$d;~001#>0SfTdg;f-;=lkpT)Ga$24{9zQpp$5f>-h{8fHD77b9#Ql`|ldP$OC+z zB_)-1Rl#E#w^&tGSG2=!RRRH-fj!3Nc^KrGWiev1&94GNC71ZaK#~o08wd~AkORd} z6_XQ-W(A9Ocm<>~#Z7Zasfqpk-)Vq0RI3RjMR^Y7BgDa2+#p$y5cTxs6%0TdZ^IpW zU5>JvE%y7(U_x58nz0fOfb~i16&v|_aony{cF6Q&<{^_g9^C}v3hk7mGiR zu@O*;f^z}!vDN&_mr5Fz`m1aG$?u}_f- zL>2k_&4*nBWVkLp%%Q}2tAiww=uXzY{beit0KgPANSL${ z#s9;L%8NU^+)rv910keoB#ubQA9Hqw{ZBY4$aMr7TpKdtGJPYF!9;Md2?irb$#Yd* zYQ%@3sgRx*l#rVYRSM>WZF<>a29cLydKirt6cIvrF0}!(U}}_{CL&b_lW(K;y(@!3 zIVt`iwY?Q)f91~)_556*k@)XGSm@$|hesG_texKhClMVpMCh!SQ!^WpvGD1Gv`c96$?b2;e=X5P5)-KHw%ZKyUWywLcK37fOYyk;+iQC>}s= zP&PnE6UG3}NC3g#Kj8r)S7Q;)12|-WtsqTFoD480&RqFA)E*Do)+_^0C;*%An@(T3 z0hZ+e+#6R9;s&4`F2$hGQU;fa8K3}w{{Sl{hxzTq25p&@tpN7#zOGISQ)~b-OnT~Y zHnXc?6|ze=#pVM*q_L#)0N!)2VZb~RRc8S68S2q;$8osMQ*O{tS#G#ED4{fnf<@Xl zSHyre52sf%-mUqWfZj1kCcXhIO@08+^Y{+e-iC*qzyMM8ZzPC&7eq_^8XK@RKqfMX za{ybT$8Z_Y7m#+_fD{EQE`$%jcJx31evGN^Uecjucis%L!tvHA(>&%M(%~F>OU%o$ aO8$oOh<^y)9oSpg%h|x`(M_=eAXIb;EamS2 literal 0 HcmV?d00001 diff --git a/kernel/tests/data/v2-checkpoints-parquet-with-sidecars.tar.zst b/kernel/tests/data/v2-checkpoints-parquet-with-sidecars.tar.zst new file mode 100644 index 0000000000000000000000000000000000000000..0f2a289b1ca50259a1ce78dd226330a7acf4c8e2 GIT binary patch literal 53226 zcmWhzby!qi6TZ6(OLzBD5(3g8wS=@Zf;7@yKf0FgZULoJx&>irX(Xi=kdj71Ns)^#%CM<}9uU%X+CyqyLb9Uj5e6@8 z_aw$hxZ=vJR?{Wp=iqJy^VVzDl1#3*(2#F1=jaC0nCBYLySg5{PxENV8XQlmc7@*w zb?am+t$s6P=J*Urf#g9%^+(IcXjF~ewe)#ZlCbIvW_0bp@GZt!lP)>g+@026yffQX zkX|(k5<7cl-D7r?El^3QN2GUmSr*40_l#|}X8V+HYq1&e0wD{ObzXmWJBnTN$8{r0 z9z#3b*sCjcBGk(4EhMdG!ct3d*V5wQoQ!PHrmV;ivBjNpX%lvRC~IrX7Me{KcXV|o z--`IPg$rV}`4N_(P{_`ly;U5nP=bT@YD6(3hlqWwQnz>zAo7PRO^o_0xE(bjYuO+^ z`f`zms8e!qCEc38u*;VhF8=SkC?Gxrm_GU?%ylk9-_rfEj@=L`G&8%W%*@#|nJ#OJB8B6rF5SbaE^tYgwJ(Z?@_MG0BpyDR zSR?BuLfK|Q`mte5_}p*n7?%z&pR2<`#0|Pr{)e@Ti+R@Ed-sM}cg>#;9TT@UX`+PG zcJy;}hQEY{vpU@sEpjqPq{yaElzZ1kwK}Ok>ewDP`X0)OHD|Xvh^;n!JXa&K`K@gs z#stDo%aFyYZS|4WDVvwY^h(}X<_n8vV|&{KVVq|WSUCNZlXTsqAoNJAc<*C-X_@(b zWMWC@kBpchqPPXczBY@EDR_&A?>+5$uD2Z_c!e^}J*W&kep^;pPmcvjCs(hvCgszGxqxvYi6lAT)kaE4ptuLw+BeKi31VaC8e>YAjU)S)Y z)wkx}G`?bO&yjMVXB*SZHm~pfg+8COrbFetCJ}R#3LY6B+pRHbF@0dY79RN_W-WfP zmLke1IVRp$SLoD~#8i)z!bf<4T3yeyIuwHGIRhK^wN>!SGD`HWxvk0i?rJnQlImb~ z7Des2s`2)zGWfAFSoZ}UyH#OCKHse0No($!U6lRr=A`dCd51=MTdv$YYK}MVnbaH< z=EYzq-_Uslm68q5asI)U5;rsw*aiI0mC&rwi@Nrrqd&@IpfKOi9-Kjn;xN@N|2-;YZTxV@L8s+uH4E^E$VI zmi9N8>MZWSGo={s9IuU+jaY02A};ou!--GpTu;UVPY-XpDn3X%X7g$x;t+BFad>fn zIh%HWm=gGCKq-&`+lzSv0lJJ-@{5iGucSFsx<zImr+i)P4=P6+j>n#r86TA->j=o|I*S1!Bf9n@H+31= zJYHb9NJ2WkmRJ9a0z&!I>wlkP`PG4-GO|5QnfH7=Dk`m)hS(Ng-WGU`RPl9KBwHKZ ztkhjOXF2ySblXQQjFcw7a7Dz<%^K8hppxddJ9m$p%f8zZ&sFF(OE;H$W!!E}e?{-3 z3hfXr8N#z!GasJD92~;L*dIxt@rCQkQHfTOdnRhFo=4r(mww;OIdr8amv&v55{=iepyYoupHddZWuaoxG>=H% zwUT6!K%EjH*Pn;xRTC00-G1l@c-awP>c0;->{U2>hfubX|FmwuGhE2}W5 zoD9DnZx_NM6mfAIBB4`78XR`}$U3r?>1=eJEE(*z)0EY}tqqQ9pRy}We#-wQixH2X z^^t+=rA&vLhikNvM$FNFp{)4rACim+;?g(p)+DJru7w*|m6AALM<7i(EkEU~?G>at z;BIURPxLyc{waGOm}{}JeJs_YOH&k)N~4i4pBF~XIh=px@uh8cI90)FJDjANg(6WExg*m zj_AV)1mBV}2>zoiAsrjH<8)Y5O&@T`{h#mzR5QXd~@WLvwKF_t*Z%?Y6-@1`2y;=4(zzrb~_U4 ztK~iAREdkJ&W}vPXNZxJA*HZJAA5hOBJg^Ix+?g~E?3F9`{27zR5$5{b_RB+1et+i z)bI4`z<-=iG=_0XPks7QG>4TnHowq6ph#5w zP7)0E16rf<9&yheDWeWuwWpT$YTr9NoT!1LQ*pr5%e)>`JoF0O#u=pQBZ{I4PL!c= z(TD@=!kVj$90=6RxV%PpKhn$eIXVTqGVBv|dFO29wJ?Xx;f~X4vVo%--wnm*^P$KW ztS`)reL+#_*Dh|_=+#WON~aqXy;ZU=2|Im;m8YY-mH>)nU5)eV;;?@Np7M{tc`_y* z9|zsFG*M*&;i-M>p;)1*GIqsoOkV^)Rm7ju>tW&&9n!hquKX+aIb*v-BJ|1h(0qjT zysJGJGO9^q;0c+}Ys=2}6Ub#c(6%mr4X|Qn`*{eV7(ME-PB*aD+>ZRVUbI>0~KblkWCW{E0{=yubdRL?IW zXCA!XS7^*OD%E%%CB3h7)OzjQ-L~jZGS$@f9@_rsEApsidsX!5%ClfO>nI3wli)0x z!4zLQe=WVQtjh=R(HL1LpsybuqtW>f^dh0j!C$DlIcL*BBmk2yD2MfCc`-+!^@;3; zmPnp9TY;9pP-oAS+A(YYO{24;Z}6gUa3y0=5P8Qobud!7KEnTc z&L|^Uu6^ z@>+KwG6&;~%(wfkI7o0b7CKWjk4J5jgPsDF?MB;6GfS!)$`4lp^Gzd-+ql<34t)uJ zmyzi&@)4k&A2`Q9ewd=KeSx`1vS*?6$gpL9(9^j1n{@;{!;E-QsiQZ{TY>8yMaa)2 zz)akLFF*~$4CloRgK-d9o`6Vz@K>zvZd{}Z1LnMhwD{qgXk-KbU&RD7WA?)dG4;UA z4^N~l>5?z@L6-$$h|MbIe0(6jXv()5R#&2P=mMt~cKm8LqhBxU4W_>n#JWL!c${5_FUyp}sIu1|NBiO8{V| zpk{bpTX!z5u3p# z+bNZ>2<2tO0}>KOr80(L-q!^2FoASGyxqibJ9=0WvRIb~1yXKj<7s9=-jRq2yr*es zpxNKYV5v8@DAxn13Uz_6 zn!xK2z2`K%oNFIh%lZ&fKM;ZZG%y@I%y%W~APF=HB$+RAf&2*KjYq9X+?I3~;r?bH z2!*g^>F{QOWM?$&C)^l7^Nv(8>Pu{URbOW9&{!pehlhc*jQKv5Z({h<}=$NIlOk>z5v?yG-HR8`OkxhAJ|G| z!F`*c&NxcAnl+@OFyu%VIPR~uhD4=tFFPTF2u6g5Q-Agl3P~^T8FTy9CoR=H^bd+| z5##^p#y$$yIMhRX(H?2=0%`7%csTCpiG#c4T|4Is0x%%uZWMU<{T{MizR|Jq$CY4z%1YL4L zGrq2gK-V;eF$vDSAWUrc$CV^J;1XN^P>~-1;j-j;RWBjhbA8CsruF0}uu73i`zmvWWBaHwuNckTq4^ zL!rXzQsF2Kj8=3Qm#Bd-(pY8fPr^HVyWKq*1mDjC)o2We8n>Z>jsfVQflGQN>}p`u z1&`Yrg8i#Zj~n6CQ}p}49fasTPXkmxjvZ0;RD8AxgH)T$!MXzRN=2SJTb<$UiNB}g zCF~5d7!X#}s9&=o)P+L*FQdc}%h@wVz-3_UpR)}*O~p+d{0zJfBoy+vWs07%S#J-O z2X4bbeAYC$=3TXSFgUuKf!Tjj_!|Io^92OHGnADZjVBa?mihqo z>Vx7mI0skrlN-DVPZDnN9h9w_+fD`_*#yb3o3#my6U`?2)xF0Wzda5F2(U16h)$VN zgHQ!@R98y7HAKC097_`$M16)|`F*chiqLzvpDf-SEp(RSfdEbo39dkryX}di=tJEZ z5Y5t^P6%at;7bBTiaCbmwD%_(@`O=!1@4fs@A9I4??xV-3}F#;*n*JgBVSbe>Iztd zHL)Xfdr5G=hvbWR0qs2@&hM%Y^nvof_`Y=nyvtN+G|0cJqsDwapU-$hBfeQ3hMv^J z-}u4@C4FEtHVaLQ8m><&C3MXY>~$z~Sj2;5UGXnN?4D;)=p&nlbloe-Zu{qW|rPf~O zqKuizEDm=PI#i1#H_!qMe18UEzx=a;k+7m5cS7Z*bdJS47XCUZYNcSAVG}@eqG3>0 z#X)KWn%rxy<^>jIAFsUgjF!PhKD|E3TmA(q`9mF)qW0C)oOlt7NERQHKGP`4Rf-p* zGG@iEV?~DID>*zburv3}WoOmj{;#v0RQEfgAEQ)N-~+)A7I4ZiJ_RT*xCfIQInv+D zhXcX>kbR7xe}>|WLCF)KS=}P(!>u5D)w`MeH%PUzCJ6Bx>~E1Xp`l*-Qjw--5KaU& z>+&k(>wqEwHz19A@T4b|;S{rmh?a^df^>spWj7J4*%MqAfJ99~iv-g=;&bxhx8jG$ zr^8r|N075hU61f;cp;N`v5xQ*YhGlH=kRj16B~akdQ*E)p@)UC)NDY{jcwtIE-LGS~bhg@Gu_)nd!l-MdP1MNizl%);B5QahcEhrzc zw{ASnui?Cd;k1f=On^^3+hyKdV~2;({tfvmQm#fUWHMZPshNYMq z7_0=A0qfIh?j;mx>3ngXv+>k!4&@c_9+;No)yUqL_2Uq=u*6N@$Zp2Y~6 zwhn0pFEs=^SU$N}-gFujv^soL5qC~0<#kQSTzUJ;d;d6KN5gBq_x6q1X3bOi^1uG2 z6pySbkzrIu3+aVt#kKi(5sDTcz4jP|;7!jW!7+3{!;2=~QJ4A-JGG#YG|BPY9>JsM zIZ!guF0GUBRe5fid~NgI(eUqZkaT8q@Zx^pGIM74Si7FRI>~DfyR`7M6m9(1{*N(* z)`Adb`~fwVeEP?xFz#Cv)TINX5AlqBjWAsOXIhvFO<4i$cn!zKab2(H{Ko~;kB;wy zYaN{*vc8l#cKp`aRdl`Z+0iET(=a|`n(aJV(cYKRRu@5CtSNGI4EG|AC1d0yn4 zN&J#d`x!1>)p+zg`u?zz{U>9G)wSb^N6Sxl0ln9qlfN|zI?J_AtP_0St@aT{y7lRe zp@CZ*xJz1Hay+R+(*1iEJf2KD|A?QIKh!epuca2ps1p^4c^;7F{;ZtycSDN~V;3bY z-O)TBk(_8LKs3;ksOz9$@`dK9axyShQGFU;!BjS{T;*%r(ApNN#m3b@=Y?(8w$p5* zrFMbUcB6v8+Y|TNf6f_H)AK(zyeqRQD^IG9#wCl_Q=G((Vld=7Fa}d~U@*8*+Gug%Ivy~R!bNag^%NwKSN#6B^cc!iFMp;jj z?}9j+pK_m_?1~4ME32vyHZs0T&lrrdQ=RosNi^!?KJ9xBrxhzgp(UZmu7$g-h1>q? zL6A8iWs75&R9&=}a(;jA=8^OM&rjKRD<4fRyK4Nfk9RHIB7tH^^6PLoFH6cG!Lz#G z{H}>Nv!($jsl&=2=W>I}JN|U*1ec2Ei6vM zfI`dn8!Yv+PkEk*wd+#$oP`dP+UOgnX4T8Qw)OdkS;)Fzf#n5|o5)OIeYRWmZ7Ro&qVoydr5xLdu} zS+(_KyFTaWEN7{mWOeDhMa=f@&CAdov?NVpS~gdCyUtKg{}ff6By zkP4esGO=t}+*$jbukouqB{|3X<5FW%%W2<%bN|0xL!= z!LxiUwvx|3D#OCQx;?AK_!Umrfx(;(D++NZ3v;JYRzVR}7 zcEx$*QBS>qEovAyF_qbNLUTI(1EnEBlKUy1NjaT5!Dp1_Il2h?cJ8T+^ji2@5h_Ja zhX3?OxQRPK*P^IN&e0aN|A6{?KJ9F>s^=Pmz9mglMlTjO6h3U)7=^g3OS^hqCOud> z%I0dall-)LnR+LOl~C#f=JVWK6Y(+d*)VqSpEK8MNzq6 zqah^?q5~S@1J1V_pT&|X6!yvelqG5G_U?MW@neH`TH|Cggt^|eeS=>0!%)H2^iTTN zD9y6Aw;x7*8$aCr+IwJL`sv!fKl133+x1A9`oY(48(mpQu=K}nEZqxka?)E=>cKxU zmaJ{-&?9RPlkUK9?ns9?f|!M(OL;g;ahHZGt#Pn?>snnVW{SgNE%gj#I1(xAIrYgH zMXQZtB(!#~3Q2F<1W~-~q2g2k1KN%ZuhSavg;ySruKO^t>#Spa(_zw8thVKX zAoXB)2X-umwdNHH^R!P7BrgNXm%)}(%g9lXXm*+-am+Y7Tm3mAv9wgzHf_66JawtV zfFCHT$>@#}00r@ejSTLO{HK^K0d$Jkbe3gl<+`?i6!;RNYTP>)WM=ArH?=zbJB&aj zhsgQy?()}Af`gsiRQAUYf=z)$+gC7BDg1#t^0i-mf7~ecazrKVq5{r=TiE{-U+`LF zCvij!f=P|KYFzE-n1K7P#HpHtr%$!CVwahZ;YWuc>0OUsZ+FMXz?7NTzT09?f#%%9 zdxL_o`L~I=%{C_6oV$xu>minF$(?V^_VT62=WQ}JMp8p1EELml|)ar*gmU%NOoEkiSvA_6V-XBCcGKFJty25V6%E4!q1 zf6E&RQBq@04@#SR)KwulsRR&HDvZ*4FEkTF?6WFX(cYWD_u#y8@8xoJwD@^a`#}|n z>zsCu2FAprQ007{NxlC2skZ9R2j?ee+xzEdd?lZQ$tMf>!lBCef;RYCg{{OTBOr@s zf^4J{JLPbJQ<+2hgg(jd(Q-24PIXcS4RT51+S#Pd!w%_avJpo=Bu~FMZrlugyr?m| zto@VO-9ye?(0JRasn+Qh3HM1qiul6SX*5n>IR!zgL-A)$;h>`+E?LNr-}h(ZhAoS2Bu1lEv^T#ywy zF493w4*)$WSaM8Zp*)>=QEwRPoZ!Ph^|;C4OY_IT5(QY4vJ&Z}ndBUspb2n6gLCmj zQ-)uOAQgBK;qMuEMLP}cprN-A36vf)5~GFxQDny35QZBTOlsS68?l6NZx@@}S{0s4Fr)D2!TQK%Pwu z_H3_%j1p5W--W2;lpObZR2ddt>I20rm2Z$45X*=3Uub~@DSnSZ^I7*?+9qt9twP{w zKbYHoH=jf366OY=%kh&-nTB8|M^+8JMDvilISyPuqZ z>4QW_e_&Q1@SimX?)B*|sCF4w^aqSU6%q*7wM}{o!g{YuW z!e5z3hNgEo^8y+T<4|9#7)^9HA1GY?FXA!_50=Oe;|-OE@nT zE03fJkKRV_EiDgFV1Gs89%wX!*ry=CM2|jz3JV{HCgkXTBgr9x>f-!qKa|85O`kRx z>eWaC6{~-T{iLKme;FHodW>NBa;ah014?_37sF2=sx@l_DD*-apiucCU%+WWsp%!D z<}Ei+b|zDujfaYVRtm7=RyBb#l_1y`-PVorG3Aa}v!i5-LEFHst_s zHxjQb1E_3jVLPWl$k0Hmm%f1w9(<6=tLm=^k+)zdWU;sQM&$+@)!qx4>A|QVY2hD( z($cHp#b}0M7xpxa**t%sX#u_RYyt@g$;W^j%u$>)yd)FGsp0@fqkTkt0%hUExn<~F z$bDxG!=cgpFmEMDwLY@O%ve_#UStFj+>b_|X_;{qHs|4&!37U*%R?b?k7(AA`<$6l zu({KwH-r=pHHS^((%}B?Zgd8;|9%CnN@PnT&a&uIp#8^&Ja9u97F$Vbfxer$AWe{a)aBuq#Rid?N!Enn&H@63(ju{YW z&7hD2s2X9<`m2UjVc-+}5jJ4?u?g~S5!9M~hp5~P4;7UmpP~B&32WlDhDB%qauFfvS8V!U*RAOQx}M+7#ND2 zSqCG?xwvS|E$rdQPn_%d=rJFDZ&bZU6ZJ~%wx1e@es`xmFPD2M1)#?8QJc^R7nlSZ znkI&oN)I*P)8 zOv5n%#NZMcoFVY!^A1H$MHDj-W!0Pl+-wIu)uc$P(SohP%5KEfCc?;p*OLlHK-DjV z?>Ig{>;M$rC#FG-7vGFSAq?uf&ZYJ0Kn~&QeIpTQ3!G{}QBKQCfd_O;6duD8s2X$< z#-Udh^>_zBC^|fO;22T105CrO^0DPDbpF@1HF$)EWrAn;O2?sSt&q#!P&rF+N;h+h zymX#(Y$UXIGDVdy%K#~O{m`wx!2l?-C3m|S; z2UPgvgFHJCLraLf&<~#)00<8~y;Gmat_&36Mt{ct|G!?->1&7{Z=tH~}JwMnoaTlt-5s$cG0G8qFH26r4wpra@w9v55j^ z&aI@>7bj%6KK^;4-xCP-WI6#wF+tQxuJy1o9ImGx0ihR}c%t;GLFj;)*#Yki3gDd_ z=NN_aTyg{-IrN9?GLVu5;^DK*@uzr%3~X=)L{{!+JSL~trHO>o%oL=$Kz zAA;K`NRG4uDJ>&1N1kEg66f^+q-Fn4TZY8XfqSuWd;_i@;a?#WoqlqQ*hma?UOq$y z9;O>rVZgV?{{ro1d~nZS2Q|m&eN?S zyd;b7#{XUAas0{+sNMBjhc=)7Hqca0%qJiR$FCWL%mnpCNKj(_-he^`cN3dH&X5ie z_Mi*8o5?@J%0Mxvd7Jo^Y9A#rQSS&-7w(!nVa7iSh0qjr-S`FU!S$hO`L4@j@X-xS{o6a@}W? z_p;ceao>$xpNl6MAn!@VR=#PUrGC`U+wxVWsXpV;2y*sw4KCj~v6#`DSvdA4i;!2U zb-8m0VmBLJGVv3Q5&eSk;6WnOASv(&53BgW5o(pu%_!xtz`3wdL(a}Ia)eDM;4e?R zT}Q1GMyldb>vhV27+?^|UEveDm&GAf?UvQGf zqgI@A;#}uSNB2rcaHqmkb~>9zaC$d3Z98VWiFW+?&pOw*@~2BpE-lk2eaq(}W~aIp zKdsO`T;NPh&nu`04{OLIzhhjj-HJ!8#rPFtQ|aP2`|ga*p8O}?o>$KCgccQ|vnz+4z}9$0-cCi$)4UWi%ya81@&s+g*bxqJ+WDS7dw5AjDa zwdTsX8fD3ihUok4dD8l4LVKdWYMSMDF9tt|efFB_^ywbya?b6x6}cvJ$9LV6n6O-_ zr=)J>bCB~aP9*Y^tzD`8OHqwDs!qIj=zYBPGU(1}4bnn8>+9h)ZDsDpc=+Ud8o(+5 zD0KK)$oG;&=Fn?O7M{Y^Lw{OCRFgMrGBgW)J+FR(PX{#AkAyHRArzaXs?nuwGPwvL zweL)p)f)sOx+N3$S_sX1^%m+yxkm~gd`EcWrZA-z2&1>Q`atShbq1udt~w`KMIonebcF z+!!5{=ZU#%S+(Tf%~uP>Ewgg`bL3Sw6HO}`u~}_$ecM-(+8d4{)r5rbP|r0lSZ)%j ztW2w{*@-P&eb>dnRlbx1d(ZzzL}#r@4`1dLqP( zscYeXjnO_yav3cr>hJzL%UcokbGQFl^QyLfV_WQ|#q84ZJ|{BRHFl1Id^*uT4kd^Fx$cQYkz339Y% zdSGZs4mb_?uBn|lY3kpSA`qDXS|*(08!4)d|M9js$R}{lDXE=w=rYOAGj;wc${_j! zC->*gj;5I=ySME||r1kWb7xk#cT?2buN+uZ{2YxC%gu)4=h3{B^1u z0lx}71SjqB#~rh*{O)3Y26%ir`}!^6w#s#1M1wrgyWP2Gq;4*F@BC6wXqxE6=eV-t zo6CmVNQ1mCuy{9|Y3(pJpRsx@Y&`~sIS3`xFXCoqUwG+sGu7 zb_KS;d3|n1JrZ7!?Gi)dupmp={8q#J+eK8*!vgqxbL8o~`sANm0v{fN!*j{>TK-ch zAWqHHzD3OHk7NBl6lK>u4^xcS)6+xAu_H5QGLHZe3YcKu=s zO7oyl71B@_s(PtxWmjC1M1D_4s{nr$G(N_o~imY$v4yNI=@jFpiYZ@$`u z*c?GCZBXs>yC~k;O(JGc=qgT5j@u?OxJ6%gwY6uZBq!*SPj%nY`@nJ$ZFgo0g^Z+r zA!uI_Umg^R)>hWow);VuzS!2cSvl%HjYVvuwktM{x!1RO>lYNwx}Nj(YbSUT0cM!f z`cAQXwpV2WZVZnT-+s5ALE%QWVry92EF*K`C zA}_oIyB(r^w;noZ%`83!od$@%;IcTb!WKGkV(DZ`!1%9C^gWwO7Rw0J`;Da0i>#8@ zX5G$DQSU~Tb#CJa{B@I)ety%$qfXNvQHfBkgedZ0lokE!E2NRe{re||1rsEjat@1W zk{}NzL4?akhT5?ZV?~(52q}pnGBSjia+FXROv(rrO-jN;M#fW!4MxjrNC*ru_3fV^ z=>h5V?nET+SIy?legdC>(zg?ypIu{RvueDKj{Hvpyji4({CPc8TULAi_2&jktPNBf+M?h=8Awr^QGTJ^WMyy@Se zYg^ER0QmkJYIqzuVilvXa~afDL65-ykI+2gjE|DH$~kWw6TR>)5>nU-vKxT_xMKi= zGAr(&b`WTHe=~Rz%3>f1pFwBegH%4rHG>$#p}v_E=wSNn>nVcs$;vHkgcuT%a0)?& zUs)8+ggNsfibgB(c#DwffF^|58ba-a-L2fmmqB-?fU9z9BL;j z22(t+1EI*9#-aJ)A3R`%vkQ+MAVWbsM6Ei0kCR$|*su9xJU8S|&8JDq>4{#iJ3qK| zZOolCeu~8I)oNh_AIhR~0}+c8`cKUCFCfOA3y{N|K#41e95t#L8bV)jZg9uI9jHuC z-TQ$Pf!+rp{p#BXe^W>RJDDKBltQ0?a?Q$`fqW^p$NvrA-HW*EK#;JJQ4%mF`=wq( z{=Aj*Ufbbcl-`2kKbcdX^fwQ^MvE2RIp-X2%l8?~%frG?gXFv{C;&J`%f+Q(W<6uj z6y4#z8lWG2a1v^6C}CrPtk%1lARwfy*AYg|V})=c4*GMKfr32{Dcf;$s4Bo>HD}#tNSnbP@R`4yz?HgiWqgX6*kv=Ni$;w^+zxK zhyzKZ7=yZvLOxjL)T&Rt$*zD>YX8pWg9DZM;T3;>-+7%?asxG!X2Pg%DIh4-0vBRh z_Tw_b{AS=B;}hW}kdf!JP%#EI#=9UzQViTRfe7EwYmVkchprdWA_iIF0-&6BZ%PXq z#@0vg+m?A>ViP*Pq>Rb)`(5x?o~l}XbT&eb;baVY)sL%oh;80AV=N3*`678f-F3l? zM}9$847;2Vvj30#R0~Q%u4d3*Fvh&E0wphM9CGg?VExr4*hysaf~F4O*f_CmiO$i4 z>hfMjG&KHAmcgMv%M~u4<%>awpPV7EM=iU4S@R+qT!m;fyU=xe!2kuy@-s#Z!2$$- z?ysnkJo5Gl{U^e2vq+c*9z86Ejfw{KIw5Rl0krMf5U`8sF3|P!%?NH?Ufm@ZLRmtb zIb``hv`$9)ML{(2=|^;3F0l5eV73=V|Dmy5zMz3k2){ibL-@$_Iw5(*jo5h6`az@X zgwl&oRI3#pk2$J^TXZ3%+p&_lyJ+tV=toEc@M?jI87oK|k_eKbyk;Wn0vX|Uv-nQARLpoZ39)VViBv_>JDO-I;4Se-& z1}<-X;3OlsjD^Z*PzY0!3r&h79~36ryMvK3a*Tj2nKBSs&GaF0=Xu=7Xiw#w@Cbua ziaFaTH}1ElB@Z;7&<)ryQU&nLAI^W(QLSJQVpinhxuxV{FLDDSPxuXoh>SY{Snb<% zEnsR&r)d%@QIPfqpscJo2G+#>a)}ml1w$lA6G7g|v%2rvC~-#u{(SvQ!6egRgs4nF z2VoOzGef^iW=%kiW1GG7(A}=aju9iPVK0#Vw^W4s0bB!4hS_PO0O5cQ%D+u!u(HtH z*bv>^7#-C64c~Yl<9jn5ih#Hh^tn_^7^&a`&|p3++=0FdiNkwyKt1NvV0y+Ckmhs{ zyj~#*Yc@`--o>WR2qa&7_8mCe?!-brNL>OcpAkw8?^3P%Qu=Zpx%T#BC9GWUS&TIP zjJnCcZ}8KEJnFeaTvFuCM5O`$Ogh*tNRssvVh%xB+NQNn4p;MQD)o*dd?6-(T4A(C}Jqi-y)FmsV;=Gc>4KS7BrN zyeQ;Ne5~7<)X=q%+i;!QL=+52qG^Wy5eL*$L;JCm);+^Pd6lbB+Vrt{;CB}ZUzbZ#JyJmHCc{4|9VY5pkj9J#f-~WZ+#1Vv0Q;`oxQd9)e9eqj4C?0n=3c@?|yWjuzWKAs<;}0-9 zh1&t*a9uM{1Vz+1lzoyOF0cbBY>-L2BGh~)hQ{0d3o@(T3<7en;Yntq-+1A#u0WI* z>qTu66SV;5UpjL>PKf~oyhY^~A( zQwky+INMiij8D)oF-%ON>@K37iE)7y_5DZB^}e`I`@|_x7Mx--&7eeszC;6{W(N^9 zj&GhrjkJ3Vhv#VF+YzpRmI)E9e);>vF!RUuwraK6GU6v@?wzmKMR7s3zI;cjrkB!w4gjA$^YzkmzL4OB%0TF@7 z6rGhjnoZl{gnqR5J(}H2uEO9uC^;)^I1qR`cm~avLg(NyDGnW+zAA}Ci|kqTAvi3< za`7K6kC*@RdR0}%h3RSFE=q*R-aZzesnd%ig>ZGbh->(NA?o2eTP%d-q)8S!5c z$l%nH}Bcp0({*C74x>@{1O6#@!5UmB=M_Bw8zN z#|8}cY3~J95SHi|uP+XbRr6=`YhoK1aNvS?t!+PGo-gNuKNc9i#H2+J>vej^dTx+F zI?%~gVH1VQK| zgNuch7UdC!OeGG$NyXdOa_UhTiBk@=)oC|C;Fg_Up&qXbGI?V}XR*)u8NjZXTO>Kc`nX zHcz+1mi^Y9-9<~syC1yMJZ`>H;)Y?UW}OI*x^q9jnf@$-i|#%$az(2?5_Nt-y%x9p z<`ABYmNc6OW_oC|eFtq?!UZiIyMjTZT=cRiW&$ecQk{O9bmI;|ETXZ2J3^(S9YM)) zU+-+hh!-JPX<`o(1PHs9t3V$MtX#S$n&bYt5$87h8t3`M7g64(&NIMnK>5;(U<8 z(8Fu;kFl$%$p_AmtT+3co(nJcc6B#YSG{>$7Q7PsL%CT$>&Ds|Sz&K4`ep5Y z=axF$xN-jI5;v5gU+=j-b%OKjO{U8x)oE=V=il3UK8cFY^9tBZojS&2NU$P2knkEP zu7mSv@*L>E*{ZId9u%Zx;JU?E7(ohQ(nDlAV%~Gu5z;Q@WqI}>SRObxtku;w1pT=1 zEIa%_)t&rzaEt5j(9+LWw|T*{8cJ7X9s2revAD`!RXX^^y4k@-O0<6-Acu@6%nN zxVyUscL|W-1P$=Rg1bv_g3F`o)%Vx;-qfw>uIYR0R!xm`oj&)pe_(>PgYJIiEz?i? z$+DiuCJ{dV0Z!GQbtfpl(EBhM@jTC&e>2x*HdV5{On({(L`-tATS+}uFJHdHev~Wl z5TKIsXm62|mPanPq+xho68lz8`9o74IfIGuNS*duxn=}2UV%rYa)hG8-LUiTZf0kv zKVSAQ=X|V;JUvf!UqpY(_T1zgtFL5;9p$?WQYEE&Bid~wI-xwO8y??$U^7L0o59N4 zc99N0bZJD-m=!(B4?naoe!D(JLJ47XirdSIJK*C#FxQUA(b2Wm$SgaU-~S_;Gg0aA zY1W>t&L}N@LDzr9bHB?G0}07kV=aY^)jmhvFiCDo78LdcL8>h0RXDuOc@a8=ND`3q zDjqDhsiOPAdCO(=mH#DzSJaYg`vf7((;rd%;w>OrG4F6I5zO@x`Vktc$m7EBcTKU| z?9=miO#i?_xDE7I;!zX08U9O9Wpt(c2Z8p7W4oV3*K zHvfsx_F=$l>gwQ+nFzPJfB;9!Go$;|m25cmiJE zSLAvJ^~S%BG$2wY><1(nnzv0Sg?qw%pV+(n+CCEvhhOVsy;Vy5es^milXm+#b+y<0 zDEE&2vwi30V-`2{i?gPVi>uURj{AW{F?OI(wXo<`dZ+sjO8O5_;di4?Xk5>lG-H=p zBr&KY+h#ivNmcs9gz=}^FAn_ecO+&=gpG}j{deA+nWdSeF<=VzUQ05m)ev?3rMHF1 zP^@gp#Pkr7aDObX?wp)eS80=>dbbR7eX{T9uV_oR-4|7U{|RZkcWeW_s3lo@eZ*1V zpe|#HFO4CNR!#p%jVO9Xfmn)A4H!XL6O`Vo&60MQb(>^3N(F)V9g`@<(sK%O15S@U*d$g#<<5 zp)4u-pS|NW2J%DipBe8o%mieFxKx&xd1ur#O2;5Lq}*#_l4vF*JlYtVb`Rr(%|Wwo zcpn%tkHosP>*x;}xW=&=Xr{jkp-mc9#KoXc{PLdm@r^(EA;R}6n(wf}>#XI&!BWj- z_|yLR)*3h7$e`?$=rGUB+xXPjaG{!c)>ElkR7keEtLMkD(C?NXLrs787k#ZPyp7Gi z_?vvs#5<=*X*j&IK%puMs>G^mKhF24OvcD$+FCq0u7gVOXZG}o{BB~APcDs`QAoFv zcLRXNNc~KUBZ-AOs|?pv`yXw7gz2=u9=93k@AV1lobJtQeY0s>*!SgTW=@eJ;7sJe z3myrMSpD5!zj}M9?c~G7K3ORqZr7T(Qs3}031+81?0c*Kl*qfExy z7BO*mhgVsw@0tJUF2&zdi)w3YIC1ox1RV!8xfZf1UUrkScJw{70hm^j_0yJ(wkw+JrWbYJe+^w@-RCT05w{f%82(r_y@(Gd zBxW2;;kzg1cELUW3a4kQDp)KrIHbt5Hql!m8dO0%em=#@HxoMj|NW%y2(9|7VnUTm*B$oAgwB5@6Q$g6Kcz7*eOVu>XilWH}252?ASGPc^W zM9oXPvfa4c1z!sjYVcu2)8}|!(^*7oZTl|-(G6+G)~3bY8OdtLRuX7br`;JLVu~1u z7*u~)_af!HP`XsA{nU9bRQ1>K)DgjK%YR|3Z*p!@a|-|bCnkP#?UMEs-ucfY?B>cP zP1V%9wGjmy#rY+n9e@7iV(M?y2slGePCT@TO6UZIB%S9nif+Zb(!lI(fA(deT4iS5 zxVtVS#V@IGmsBZ)f7sUViV0le1&&ePJ6J9*vBy~FU)8|;!xn_x{6oYg5YzZ!7A?*l zR_kA)u3O%#SKSAZTyaCxGb9N-sg0cJUDg&X6l0jOgkPk0{OP&+W4XrV0O@ymBMNAE zyt)w6n{I50%A~5K;_Z*~%rO9l+6#7>VJywB}hgdr2$jL$2)eGZxr5# zcU5q={f2x@gMtdw%hpa8SkMojg zB0>4E$==P=AJmzLLP7xj+J~lR%2ya7l>mYavrRt6BU*kyo-icX0m?N3Wj@5WiKy=w zb(}w3iCP+}-m|;~3fmG8A(=YF5>i|xHm%HjIH8a=Bj|7P?YQ`9W-yZRgTv=a@Rd1b zDL5m75!H$V)ivq*SumYh&<6e-gYid2&Swm%h&tuMWGaU$H;XWCQFi=~ZBMlJ_4$9x zdlJIFe2JK-3PO&w_mhD0=sBgBnhF0nqd%N<64)Gn-!U9f6j9OxxxK%m9@;B<&?aD4t?jGpq zR}4{8N#F89s6K>!z4dZjuVfb%MRS*yvuxU2BB)Gks8udgXUIeNNm$9t7hWtm6`Ijv zXVKup`JgMgmJ${eU19u z$WDfuis0--aCad6W^u!ECu*&CoMbYsAIn==U{$B8l~UKsPMHcxr_O@PZiEC>w6P=E zz23e#j?N|E_y*78K;R|-WSp^`6uR>A4gXMgNH3=x+X`gNi_AJNJYNQ$$ya(+0R3QgQ{9M=>uB8I`bJ3C&Od z1xenPyvi0kmB^!_@fS|eNP2#TBBq1L{$(x=mi8tDK#cAm44LfMo0UFokE*FVwyz+a zD&5a+#519i`!Y35Sm-;5C__k0i>$Oh<$3q1-#yY~du^HKI!(tcsF4UP^4wl6_2ID& z>hfBw=xSTHuG*cx39@jiEaMqdq!|;zF7+ApsOdK35B^S+oRhHDsAhE}A*I-j3alpA z8B?NXkK1UKE0fc@v;8v$A zRx&G&@~c%1djiz%s^3cgQ1a{O;arrE#&bMNzbe_d&$r@_DXZ7W^6g^zV;0>exq~%G>X_?qjoM|C}{6< z{H5oJ5@Yd4wa<3=2vQkSNtGLSf((MOl0M}kVpv2r*4GkDbts*E#eG7G&81stDUl@D zXFd{AB7KazxLcS=sPW=HcWC6jTPhA*N8x+uX;WsOEiTrE5_o&_ z6PZ^UgScwdu54w@>M7$`Fy;@2Cx)9VZk!qP5Od#8qVYmiP2B){zDbcDTL`@v(|iyzwkP$NPbY_;X!kKs46q;uv2 zvZNV2^c#^4eHtG|+CCm6Y}-P>mR^bj*%x>pMi6+-NDYtwIySQawyqwWCjKyd*n+pi z&;V?6uca!E7MiI}N)}zF@Q@KC*~ey1+OHB6uRqH^?0#B6&Ob!1nAzFC3r<#_@U_il|4Ar4XCy6% zrv;n!n4w* z;tp|FT^)O-AB?xl?km?V#1rcoqTC+Db;FUis<8=|^d9K=cx(+3D)7KX6P`Bysgz3P z0pQPMrq0vY??R+RLm+t!Tm=brQZ-o2@vyorfzajpkj6IQ2F_HJOM=_NS?ia_B4Aw( zf9po@?0O1fu6347@#}KE4EOe;i&Pk;2ir7_UP`VV&h#`P^Y1#ssi0<+6?`A~PjT2v zW20Lu~4gT5&SfqO!PKd4VqLKerQFvv%0&J z!lCStm;0i%^&5=_AMJ~aL-dhvgtV#55`J@lflS7aOeP4c_U1~438;u*)OT8`Uyc_s zh072XnBg$l3T1NuQ1^nLji%en0}!%@kxGDIZiTs}f4s!}L)e}uCoWyKm;}A|yjL8hgDY?PW z4nl77)dVCLLd3po&;5+z)f+ypuKh0#BGiM`<-zfiJLCuAq1pBy; z;e?LJR+sS6-PN;xH|J(^6kzE9r3{a%WXHtJ%sj$VNw=Pn25j()$YU0H4HImFWoL`L z63O)#?LJ6BkE0W;NFM;9yHO=gL53Z6x9BnCu-egHB0JDfATONlut4z zmsD>iHf8eP-Bm!0>myP?P-DJ|U{CmphPIP`5++E?*Wd~Ei$;+|5!~P20!;SK=nB{) zwazaG*xyGj^^!29KfVj0K%Tm~)mZz-Vim#}o2Yiv`aKi^?I!($X)?-?4_}@mgdO}t zlWJCLS2Jc-m@oi5cxaFX$I_a^3nA~uoOF;f>$FtlZ!{jgBeLFHY}4zMPz+y8z{?&X z-yWBtRPhUy-+H)q0t4aAH90f*iDYzH$ z4X}U(?m|HeFxrGc3`hZ{SRnYPD7ycK%yjrRA?Hxbjp7ZEWNa$^+78_${;NH?!dvwe za$(&rFp+f8!pQM`aXbt$bg{7;4#~sJq8TWL!O^&g47HH!hl%*u5nmMU0_u>&DgHBx zVuoGAi9ktp;PU7TTeFE>_U)-rxcYlox$AFj(1JhoR>-^!tEodOQmLt+ssRAEx-VG0 ze~S@7iLs+=T6`2nztKT}Z(^``7MMFVVCDv+kpDd!D!FMiJCp+Lm?Z=lRna^Lh970i zM}Y-MOsAt=l)OpSP;kg<+i?_XBQg*da>fyC_6MJUDE6B`;4a{-7)E5~U^`G(z%1zC z<{vGdJDH34TK5St_aFl#u{W*7{fDaxcD1!OiJ^({d*{ndXoE9V%09BsbCXi|*suDcNC+iPv z)0!GiWIIq%Os7@_&GZk_4j<2E1{P!N=83H{RG6|6&D0Nd^9LeCRiR<(&%3bgZFg1LtwE?$tC&eM*fJ)<}QnR5#8IiqG_ z9C6*AIouy_KZxZAPvJMTlmp@1m4g$5#Yw;VR}&LMwuM$;C_?}%(v4PtEgt-uW3R~? zzcoToxz_MWq1P}yLKz62;8dlXqZdmKgxn4?d3jXAN>nu^{^zp`K9ovcgR9V!!)XR0 zw=wBJRCm%qH`bwB>%%WA7)xK>xuisD&Q1aCME_8R?jrQYg=1oOfJUV(#&EFal+OTL zFcX|EKuv$-FAU#k1p%H1pY%Lr1Ea3x@-TPG-@Xjq549aF3S=rJ9|~Yu?Ex)LVOv5! z9l^8YAT6f+htZqo2ykU2=Gd};;gL;I_w9k?!r#jf2FP7r+xv-K5HiNO#VIs=9QaHf z919Sb0drxy<6w~w_2$AHga3FNgQM!1ReWq12B->vY*$X#F0<$dlL?WMiyRR^vNYd(HJiK` ztRjB<{#6owa{l{?GqiPLXbg;s)xK7Tuqm_!oWeT=hBqj2IY|_rR>ON-T!R`cE!l}( zcCEi6Bz`F>`wjj}Jd6VNeQ_W{mGk9=;qnk~p1H#Yw`&36^8mE;B_L29jRUI<29&WO zhK0w$%_{?^IC$R61e_f}s%a(=RA@mBI5vu12{Cc;Q|YUJPCn+{*yX1K-JQ@a{3$RY;6DxsF2=|25Wi%jM88OHhL_)qzSG7ck00e$%c6peJw^xJMp@ zhGek$B~i_I?Ey?6I&i*@9$Ed;M|ET2t4KzZ^Lkk{FaOr|)C_##AvPU7egtH{)G-h@ zz|n$jN!cqfHnkMs0OJopdp6R~y>D5<(Aeb#j9!r}A(BQs?gZ zXQei;^u3?qy~ubBSVk-C_)E`l#PIu+Q^sBl5}~T8uWzQ+0VL3_(XPF8bp@()-h0vk z`^+*^$@C>^c^aTE=mQ7{lI8V)ZgkyEcFZ2_=l)!CYFjzocj7;AniM?9b^m{yv~+Lp zIpGQ&5qXU`zH!zv(hTBx9)9btfUnv0#h>a`mrTUGQ7506F1SeNOWDPZFP?12iLjb9 zp8^M(UmU}Q_4xCq6F)k!@za3LvLkf#yg$AwsF^%tpRi|se4(z_4eQ_f(=D|jid{H# zipB_pr(kx_T=-}Lo2Xn#tF^=AIhfPZD(8nQ#MWV$QkMOd#r?UV)8(B#=@30w>wn}X zEz(Ii5Q-id&p1fHW=lX(4wAt$<*Yd(FrB~)%$(X=tx`QI!qaL*jASvEo`AWQ#o}-; zmxzR$kE>QF=A6aK!}tuQATmKMA%X|q?sYT}+iO+1;}_=E_ET_lf3*po!@Br1_zo>u z{Obv(g!k|n8k%FbQ1Z_j4V~Jy#OIgLbevJr=crxWtie2IMq{%rJUDji;$Km7Q&pju z3JeletKYugtqaX_8+I}pi5@prM5D3FDEf{W-$eESzX@ubq3FqrRg`p>lHDavp?zwf z{>Z6bh6?BKtKtykJynRX)I2Xi8t>X?9v*6ftz`?t8%V=&6GEbvp_9%f=DrnJyX=ib z5P0<9bhr70j#dhX^bvUn(%riHEzuSOs#2yEut zKxm^xl^BF#SSIaS>i7k%C}niqg+e2kZs+bGh(&)Wb#%IdO6iKGivM)#D5W38%Fvpk zQeD~>u9(ionSn$ii7u3KB;2dT(sTgT1fZI!*#L?sFi4pYFj0kHF(@DwtLcT>C$bLE zB9qW_SvPFaBNC(m`Pu<3kA0^dR27U7ETjV<8SU@uBc`6XI}zL-_J27TP^)buFcAY> z^1ewNQS~OTr=Y`!Yx))fkg(Ed^?Rj{K&`zi{>?D-Hf%S9`@*3nBn@)}PUH39`>Ujf z5Q`XmGE}Z;3T0dxTyil^GCfVzBGb_fT0h6RcDx!nRTzT!Wh@RQkIOf?ajX;`nO+J6 ziGR{4yy$E$iu#Hv7IR)S!;OzOnLeQ6O&H@l-3$5whQG|fS?X#T2eM2haq-#|SX9db zgnh04FEo4n%dbBRA$m)NfNbcDX*iFuVhdvkB{G77m?~TlhEgWT&5reTy2?RTmiL(RUc*3Tnb4kI0REdRd^8KH=>KdrQ z!D7np4GloEeRzHz0iiPn`nsD>6T)OEddYw_86?xS8f?Iw9d5V0R zGU%*pISNrbS^3nY{be`kk2YdZlit3^KA7=8>9G<3W%Ign`AN2s$B&i?zFJZvci9X$ zfoF|saiL{v1C14OwQ9ki?B}2K-{ztxoX!N_8%O@TMe)|F<%ya9#9PPSn+fsS?dF{% zx7SGR##3+0CGlE?9QV~L*%lN>e)qya{Z?L^FHhx~q5I=TuZ+0)Yva0asrH|h9S;iB z^Co;IUL)1F8gklB3e;OU%5iU0$^jF02hX#WZGx=J;(6*hvtJ&{HLTqAYXXI|26F#Q zisw~v*;rkxe5=W94t~+76m-|F4P+X$R;Cku(~NdhrW1J69CJ{nYYJpaKM+rGYye?xji*(nJT%txD0wc*k77c$j|9HDW;^iYMPeLTez( zKdVV{%U4F6biqKEmsyIS{Ttw60l1R8JmGsF(XWYIvB_6ENVA~7-}|7^zGn1OxkSsp zrtecZ^mV+F<`=OVg|j=8zYjIayKnRG{OmDhcqm=v(HsLtO_PYxXQR)i`>UjX7T*oQ zNxEoebuA)WJr}!*=Nt0rXH2Uik*4v@&ehJ<{wWemN0(n&Tcg2GRMi7v+J~cSj4VNP zCX<3cuOlN@^M-dWy+!h`SAEVzy#Du5P?S>42cDRM7v>SF0lFt&582^UkITm@{|(bf zFm$311M|`xLhM8Lq3y~)3Dn)BINXqJ^9T~iEQBBu;mT^%JhIN;duu)#th0ct*SB#6 z$0hw*hux%1Ciga5)~Yw__f5Kbi)??R;)fw=BU5B$7Vyht;5WQJW#(!R zGl&eM#%soNx&%k)t@ah+2DZVU6+)apCb~e zE=E{>!A_sg_|v5PcMz>(6@7F*o9AYbm{q}@9?L>{1=s7py>>T2J^(5(5gSg1E#s@} z4r2!MjA3tOky-`H4-!-sDK>sUj--{jklzUkrLch@3xVdCtwb{%GnkxJ zultJ*7@_E!6inE1+4j=S$Yl%oX)xF-2K`ix41Sf(kzuw-I=`9cf8SXza`#CA0(dx9 zmvd7_Yd6p!1>^WOU5L)JmU@a#N(UPytLudYReyaba<(C-gnkpoB3)^;=fut*Y+5Ws zR^V$k1uG?5$k*}KXXHBw!!p)A_HcFXVDcOkGv)Dh2u(@C!N0!tab#d|MjFv>+N>^; z8*YMsyu;$3ywwuVstdF-+RQK;)KrJJVKgditaO&l!abyukv+b%wy-^YW_AB4HAuLkZ=Ay|IRAAIc7*F+f7r^# z#Kg4xe;-WJKT~?OG11W#jKzF_Gy%`m%;-5z&PQ*Fl(MMILrNAEOn^?YcgSExbsZ)2 zsMTx*qC=A>5T3-ubV;v~=D}rP}`BIUB@#u~gT7p*i*D z9NLjH?sBi2-|l49bIO(PY;5ICNnhRZ=AF+WY3%wl|5Y}|p@ZA!+;-G(+h9BTd}7(D z`dZ*jI9)$w>1<}cakWFc+KcXPqC?+k-$Aj4n@qMT9$!az-nC|aUo9oArHKaoH%fxH z|5kb__!lu83YFcj@yo_v5P>N<;P4POm|GVj5ySPJ+q4sU6!L4dfJTJDt_X7<4$iZ7j8P_N(hj>fdO@aMn(@Jn7-^2XZbS?jOq0 zBL8_yOByfP+<2(7M8j82fNQ8|d5riQTCF^pqGy`XX- zY`+h3lM&mL^e&6jUWG+aPwphUiBai#TRY8L9u%M>{~FlwPIeQq)gV$4UK^p>l+ji{ z8-?)Ze|azafT6XcXZTo5q zo5Lv%8~Pq-&%3X(ag#sz8ma$z%g1S}KpWR>*zeU2H&@vBsa#9G-Gb|_1!aQN{biPk ze?mCDC=JhC&s2R@ z;}|rt$Rg}22DHM9EG(ns)MX?jW`I)f61H$^NZ~~U@aCpcI-% z?DC)8lZv+*te_3!Hp;OiLoJFBG_KXO*M!Sjwg0Ec=40!3LfEB$sqimps#M44&WaZj z@Lc)l69|4-R3#lVos>MrI^3fJq2#`&Wn++mZo*cHh(QYYNjNmn`-4$~mt3(W( zn!;aR`eMjNv5g>zO|U{L%w6PGzgNHx3u)PH&AEOkN+!)={Ub_$U@eP7+~i7BK|yMe zXr3PkIju_g@FB}?x1aXDOJK#Pz_X!eCJN_mX2aU#^<<8Y0}njznQ+Xql7I4u0NJdx zy5sTJ4=?OVtYI7Z=!2AuMIvXF#ISIV-#V+o&cREHESMS=B~hiroCoqyL=0c9J#@VPnp923Hr>&4@o+*h$2ym;8BfEJqwU7Lz4IR)46DFZz%!U zTEmmNyMbf)4+1~1U4`8s)-**03=XDAk*@BMk&8F6tK1b&5dUFyloV++>~HCcmkl&U z(GG_)r)!r%Uy>rrO(|z6eG7eRdR-!ov6@O&O%XQNEf5h&QP(LG0)U=D)EosrC=(u3 z7!So;B6fcY$T5!8(jY*p1nEmxT?Leo(rHz5T>DNh2eyO#Zjt;Xkh!kKOby{;KJD*V zC6oWiZ4!8*NjtIsgcB{t(^Gq$RN=_lYj|OJ3e=S0jk|X*>0>UPknX)+yv*wY8lY4^g0EMB;gsH;*$g5Odi7teNmk+4qiD+x0UGR3R>hiaf*zkn z7y?0HzC2e9y$KW(h-${1?ozSV&)dEoPb1uE9$b5PdYVM8T|m$G6vK70FE@0DscN1b zqE5!hAi2Fjs9h1KGfgFkHC!a`cVzZpa5aJ{hO0LlY374ud$ichG5!sC1J(%-;=1F8 zd>I25_H$?$fm@P(%k_!{7BCV3(nR+9ea)tZBFZ11H#61&Qf_fW$1bC3&SMCDM@evA z3kTxFAw8W+IRdontId8K+z@RjPJ^ZJ(Y1pE8)`g%5Gt@Ke{T4CbpjuE-%mbu6&I8w zwnXsxX}eVH&F(@tkq+DJ=3#4vu-|90U~iyr%alyy>G7FBElJmW_2{|)dhVcxr?dhN zmKh%Vs;=E6VB-d?=DknxQA-+hVyWZfx9Vimx)x_?Fw4l6*Dfm|i0R3eKW`6Z8sP^t z0@-&1e&dotbm62ZK7dz?a?u>3R2%dy$H30q91>W$ub2Eg;{e`SW#heZ1}9n(DoBPT zs<4rP<@v{5!=p&i&wp{hlMRMrG?uKYyW@ zD&2XPAP0whwN4qvU4*<5VG;Nr>IVOWXJ0hYFH?;RQGy!-OG zf5*lx`u)1k14Z+O@E(NQ@?M$rH#%0g^Isbz7QdgE(@LV<l9Ql!ujvGY zlWruGv1#dbKb0Ejg7VMzVz@p+#r>=+)2uNGC1eV1own6kPZY!5P)I8p@A1ql_SV9V zNCEC5sHqjLiH+W9f!;suk+Gs;-WtzF5-0xD0k~2IirUzyaz`^-e^ThCzHlb$c_|Wu zL=?#>8*E0X;jX^eL!~B_5IGLpMmWH$=^Wb?$}>!#EUrJM_`hB`=YLGzW5THI9W=V& z6lpJTB06yCuW{Tc{yR)=6@U`y;8imH(<#IRlQ+AYaUJ%SqBr07wRU&8eSd{?3Lg_; z8$-YJ%v&W<<`yxGQzjqpC;NzT+~15fDYS1ZLh*Mu?xCM5n>(#ZHs44xcbr-XC%6fF z;ZnboV&0dH77}gp+eLJiry@Vedgtv-GXN7;EQV7OU~9?FE3bGL_t#|y$M}vLhyEM9 zUA0_Wfm@Dg3HjA7=of}oe7$<6`-Htw$N=x#wi(`@oG48ivjX8eeL$QhlH&J6Sk z#les}MPzA|hjv(9xvAo5jRzTS2#$EsBZ|lEGWqyig8JmipKZj*Zywi(u4+)R4c)XZ zL@|A3+ODdqX&kPcdkK+4jKrJ(q6%}6f5fmU_;3=ycw9*hCagDy710|$a(qwEf(H?& z?3j;&feDd=d>^^j2MgV>)Ug-qKpPQ0K&5xS1skp6neC;ARW=itwd4(-I6c)~AhrC< z8TLYtc%Sm15Bn02Hgd56OQDSa8{owP&SkjZJe$%c;Kg%M^U@~n7Gf*sDM?bXiJwQT zq_+1(2d$jyzoVPGX%JV347Q!O>4Wqz9vWYE1JSK)jr;gRA?J?!iOFchVx8iN#WK<; zte_i%Z#1}n>K0*xt6Bsb58f6?D(p=in2>w-&~$_z5FA0}@*`Sgi2)i#nI;*`6d9n) ze^OLv^9JAvw0dU~p$e4Svdp+a8_MS^Z+-T!OFl@l;d|%dKrX=Wb7W7N4-9!hy_vxC zSh&zv7;H!mDuT+ioHdH|t8tc2u z4lpS0feMC#5Deudp0a--1C;t9(`UiFq89;+s~s2&r#o|2Af~&iJ>~23SVE|W%3Jf) z!@HKbKhYHZIS}K%Qz$Rs8LfSu6({x$yt)~qx!Vywg%0cZ4X%kC1KP~yHUJ92h~#E` zg_KTQLNE{Gx}kaqe6nq%!XdW<@CzAsH1P|frm_|v6tayDT>{0&4#@y;Dq+H=(uW`g z+&-YI8_2o^YU=dXowJDBDfO6%yq!B2S(e>2ezK-sfh(fLNN@RV#g2bO0i6;NqpLeI zxomh=CRtl((TCrYI8Uxg!;3^Ex={b&ep8*{4D}bWzTo`bv|O#QhTsFsYCrWfiz=P} ze$bBByTYHpOr#XlT;vC+v|0czSjO3M311R5ad-NMfwW`>Fxdv50+SUK^@(N3x%S_? zDtKD!9sti`4$yT)40H1YjNHAszyfGSg#$qCe=T7O`vOq03?8T}L<{%xRqxROUo**; zcYz=g^e1RlPmJLE*0pATe_P+SNC%aStE@N)KtkaHypDOgPk5`Fg1v+W>3ab&9wozR`PZEoL#A~B`x=c?Zs;$1xCCmog0l_EStcp1(gcw7%Y#pq<-{y7G_8A zq>O=nk=}r7nx-ev(d%>de?;n<$t*DEv zJOR%qi|K`rDJGpCTv05%73FM6`85(UEi<~5ik0e$Zdfg|KP9nw! zby6Yh6B3Ros(#7!Y7Gx~19%z=Y#U7iNrCrAqaHbO3p5n(;eTUu@oaTtRpZc(f@tH{vYqk{HgbR)=ELNLUdUWAX!;(>x zm4d__$CM?~TONYVDPqx6+i9`zl79N|pZ!j*^|ZnKJ}X7pMn{!iz3=gbjFqcd{I`tA_ zT1CPQlUG4wWAJNd#g9y(jMeU;aBgqkzwvHZSf#Uy#nuzusp4`#5|^Y2A4sI>y7qiJ zfz@5{X(mD;#aIuA0%x;U@jnFX+S^}o{M-=n?>wW~Nvq3Rcs!VnbN)X2pJAFR>eXz{|ni^;VZCR2jFA&D&g`~WF5 z7Y%+`x{W3LMQRi^f$4E7S|oEH4Dvx33ML0nogo8HQT&?8g3v@mSw58&gQju#*JXM|&s3-wv}`8;eH9Ux@tBVkp^ZVO-T+`N2H}ZA zO#~6;V3<(~%*}WkRBo$e82gm5Ye+Dgm|~=|LqKwsV=qEm#~%QY3LtuN8%fq%`4&84 zOG@!Diw-lyG&D;C3fj?F_MZFu3#Xv;1KCn&ntOo#hGNiA(%zF{6_0@}Vh#rxSJ+CW z?5AzLv|Zwc#PQ!8+jje6fbGFGIP}PDfvHyOZQ%MnX!1ss4Hm$}Xd4J@kp@2M|5Mhd zKxx_~L2T7zEIX4FD#YSa(5TaRKqI|F3{C2oPWxsO?djljy3*x586Vyys~k$pY7mb1 zD_rg)E_Y2ip7GvQ7j^9CmX=OtnA0bo__t!@>6q<5ZIvdzDL5DKeejd;UG@04U+#&G z)z*Z&NP7mVV-~xO7oL2L))y3g)9@8Ple5dXSBn zH9lDYF- zyI|OVg*l#pze)o;ekFzfFxjPi&iw>FPhB)9D=Qr*jA8ug6|7d%rD`E9d89q~&svA( z46}1F(snkKVE5!J=L)csCizdlcmM@TV8R0OoJmmWNebb znW47AGHEDz%A#LT71n86=DY0PwcNAN&|s}+wMtp&uD6|5&{VPC!R-L)l_ugr@Xg`+ zSn5%;(u1Pa$Ld``|2$);BEat5O}8ch`S>km zru|HJTK--gv=zygD&dn4op#~;cirVkbiVJG7L~$+Spc=CO;^`9+$RVd$L|hE$(e)ni zR3!hA3cL((qw-JO7+^%XyiAJUfRx3#4mXD-90YxoOQsmP{#3i-y94G{MuaMCiv;2TSo z^#!(rXAv5oZws@w>ByDDM8iE!cR=Ab#cEomE(B3^a!K zizP@xnhSGImjGp@Pyv;Jie;D`b(Wi@hsO|m!~8OBi!v$P%qRaykgmHK-q6;Anau6G zZwp1xcuokK=4WO23nr!@s{TkdR5^^P%Me<+87Mazz+1P*=0{o7O(I=CyWxMownS#H zNfxeIW?Pj0YZhTa-`i%vw(w>6?Pi#2MP-y~wT+6sV`dn7WF}G*^D(PzC$sEGFg@bj zMHf559S!?vTWNqjiLC#Zxhq+>^mYRox{0~be;h9}>}quLAF~6Z{s}LfE0)?(HO8C$ zkPEf?!O%09Q5NN#gfx3)F#9jDY|l(|c%VIfN2qQii(!@!K9630!gkVjo8Tmm{vdQJ zbjn5UD3AV6XjN#Hi&7jyR@R_D0&57z50J!fuA1`LgSh&2A9cnZ1%@4zj=y5@IEWgX zAh=!DZOH2dVgZ5$338LR5WLlk-LOC~>J`HG;M#7>hpR!9g`d**Y<*1%iaCXH=Om*@ z5NrxM2WJNdy~%*4fTnDn<>km!12KISF?~rIdBqf1jaPtlDZRW(@@DxamgB$4xYf`N z@_(Yo+})6`zLb17^Zu)mF&96R2DM$-pHMXemr=w}#eV(qUqz~UyHzEweZBnlIpUkBlD0rh)5N??{@PovH|C7MM$TiTVK7yzUb(Xp(#pA;ViyycCuD zg3KUuP<}n<{+%E%RjNTmEJ25N%HDKZwWKe%kFO+(P7~J>uO;;|)s=V9f+VQ6yHjxn zmRa*++4^An?H`Q~<@a5d@6i_&DfA-{oxX3yqkmqX)P`Ly{mtgeuNiQ3|K(MxsQjx} z9?C6*dvo15R~<+vzDKK1vbzWR>|F%w4Hhdzpiqn9AomtYgEeS8Na7+3TPr^oV#-Y1 zyBy9D6Y_Cso6n8TLC6HatB8SwY;3B^y?* zeNq*?TlCv#am0+aWf*M((zy!NG^tbt4w(R-XGmHYK(nKU^(#h-)+J=uOORV8M!K#+gqkFfN}K7K1x%??tj zBP|5$181HO>wIZ;aSNZ{b_IagKdQx|I^l9-h}S+|mwf#U-nFi6rN*B&8+ZInw zOfC#mZyhK+Dj;zHL?oQe2QWwX=a-g^(IfV9q2v* zpLo9^!WOdoW|^j>lj0?H zQW7sq^hUPJk2pV5y~8@_$geF?(kW6xS!LUS=@3ZoR=H~8;WX!Pxh?i%DZ{JH*x;Lt zBuC~H4cDI_Ek8|q9C)Ra#PgETqQGKJ@o1`|Bb@lR(z5Lml|;ABX^#h`Z-VRR(sC#t zW1z)Ha~Li68!XY}ypjPkjfdj$QQw8a+0{~vlclHwYUtez_Dv7J#0Fs<-E1_3Cg5Xtw zkdL8A2A%w&-HzEtSf_~kL|8gPx9`N z8atXgxGbbBrppBMSnz(MD6t}y2v4o#yS3s-W_ImiMa~NcQfg^Pfoo=jy*PES|IAAR zBRt5xlB^B?3$93w&;NJ(Uob|cq6TS9S@zPnGv7b2{Es#MPq#SwyZ_4V zD5{RKtqJ5>{gzr_0yB^8dFrluc!+jgv{Tqr zP0PQt-J!aRhI*z)Xn-~xzsW@tNw)b5Eox)q?6=c!0E9g^uF!{&=1b*7z#D)4)=*X( zYdc47q(QIO%GGxk$VUe`Og!cmR$}#+narzL6(PD0-K$OGz*{fKx?0Q z-~>D>YpFVcoy*SQM=Wa4xSA>ul&{ZXs+5E-z!8K3X6)RvbS3zVO1X({tSb#fGqVh3 z<0PRNT?N}p72&m;&zYuKDwy^02Ys+sR|1S%3n)!;pbmQ&4R9-^nk|9@zE$_F`82;F zqtl$^H7{-(BJUpOHOtLJx0*w%)&)OM)i!CJjr^Mo`9Dm319T)&yKTp|ZQIU-6Wg|J z+qP|66HRQ}wllHw^6tC;y|34|x~jT*tzK1q&Z!Uk+uN38@OSuC`p=#TNisBrJ1#VdKD#!%wzV`Tx~$ZV zcL?glkYmnl=0)G*uCGG36F%GCGeL}YeH)s{(1Y@to85&?^l;Sh#48!aN=S<*lv*9K z%Fnk-va>O{a?@6@GP7~h(U+tiJI)i6oWdTaRkWwvuRMQK)VS5|hr?#UW=*4UV$opH z;8kyXM4nvJQZ5=ibo1O-JWea=GhMfTI`xJ1`AlwmKvVVK^x%>H=?)}v27T!6_7+R+ zsHxe2{(_P%tZ~aN9L2|8XwE5hXxQeG4}Yk>Xnxz)UW;rQM=~Ty{<&;PTqfKj88jSD z9D2@Ld~Lf4tCaiwF^>vIT^2`XYi3m;Z@1PH^V_SE4N0B@%kQghi(apr?&s~FH?Pb8 zw|(9gnTcbN4p48F^c2t4bbYDdsX-gu&xOeG zy=v(j1DpGVztKBl?!ikWQJ}n?c6zzbd+YHSk2&2)rFZN);_$xz#&sSe3tG+O*AJss zN(~MEp!hsL3>~h2jU%T4=frc*99n4SiZyZN+2ebQBXY`#efNYh@WXjI0?%5=o6U?7lw-4p$MVBRGW(EJ@ z5F3cBG7@()5*x6I^e_Kmi)t%PrJ71+7FAC*(fDsmiDLU{9se(L!B{Fg_z4260sr6m z2xL4vXE5UrJ66(f7Oq0|Q7ZEfy+KFvYYt>)AfNA(VBapN(;1>3sMHTTXO#~#=&hGu z`wHI#a6{i>!UFzX=TZ0#wRs2zm|-xbwmjb|P?L;87;;mb3{@d?6@;$Jpggdw9!>{h zBI%sj|4=qkq%E@#Q0j0$m4%_2wO@qs^UZ zGkRI7e@P|e%aaX3a1^Z<3A**Xj>oWRDaDHHF~7k5<%*+#KDPpA#+b}(sH0+34k9L7 zk5%|gTo^{yEV98|V#rbWoX3RxmY`4PLGJK=nGd+h_RdMP;7&d9_`|Zal)qt7>$}7g z7s9K9I+IJ(?UW8TgcezaVbSfh>H!RQHI%Z=1?viY0lExhdV_14DbG^ywEyF*rZV># zw@x**TN_&Tl)hWp#@=48FdYyZo$e?m4HnVs;1Hs{ zn9HTYrJ`P+o?#-_s8SlQ>Bq;Mg_IEnT(l#(4rwmtepNg6vT+#Tl?S0&~b~w z$W(sZ*n!C*pfn;2-hmWW6{Q#;zyF;ekI-wU`1qUGcajP2#i;ZPa&7LH&>NGAInpRl z{?Ip*IM%j#Y$eBjz%l2t;EwTmT=Mxhx%fVx_|v&^T|RPV)PhXeSzAC#+xr zLSgN6tj>560g#}&`GG;Wu-4qh?*5`wV7QX?)H@|%wTy!i6wcNBq z@j;k_nbsAF0W&z%yb1lV)ar*@{qPLrqk!n2QcQWvVS4BhMry=AEzU|;0Vrg<;#DO z?TL5b85vmq06;IK5+)&HsG%ULs1a&nqw)p>sy_x9HUb(MCKMDT9(r^?Q34|-Vz_Vs zBO@U#jEDj0P(0}vFxnj^$Y`YMNboQML=NJ=$44oU>&6BG23iqKdS5+}^Kjs&dO=G; zc&cg<{t+XHY;!()PLc8VX8h@fghq1)L3Q`TwV6SpNk8ziY#&Erpd-UlQ`T_fte+Ju z$ONjcRUy!)C0cP{s*V|zBB8VDGR_7J?(d?b5t{0C;DHZm$*EOJgc3qv1B|$qU!`@} zA9z;hGc?BbR>lluLF*tjlkzl9@-EZ3w}5K(34*?d~J`xjZ&}>Hr3QpA2upUkxghiMz#zo<Q&P$F(iFZgIhN z%WT33wfM62liKa`4TJc`d0p`x3hczH^dnRjKzq2u4ueT{DN1%RCkmH=CQt4<1IEJ< z&nWMrJFaS2dML2@U&>y*mGU6gK>73k08clKAwYM8;M(q-{(_XpT7FQJ+WSEI0kHL1 z$~`-`oP*NJAe2?y4_>zN>;0`~>-qs(6SQVZ^V$gN4DASlK8#tO*v>$Q7+JZzprUPwLH}G> zj2FCoRn(?t^$0LuSJ*bJoe5_N*Rz&?w-UtMIq!0&B&z+j z`x5~T?>+0!FVh{=)TU~%6y9!=B>9>(F(#6YPq{sz$O%9Z#LL-z=w{OAg4$3iJx~-j z<*!OuWlD}BFunWYA`^pat@Qu`yW7)X>3w7sp0|np%hXO zsg#*t7!Mvo@f}76qfV_+Y0=}N>}rpUY}N+PqNN>8bs@7c5X(k7c2S+>M|BikbjEM4 zKW@p+2Q_1@5l$i0VsnpS91#I~PWQ z4JrQZZ-GyXdg`)v=%Fyjz>X1!(s9@F}_-+wo%Tz(-Pj;N-NW;i5W*e z@!lZJBRwgEQi=sQ8tTeQXTveP)H?Hx=`JFBU5QPApJcNmfMAJ*(7ssX9cus+Fi4j` zXT!H2N9El@(-vwZD)w3f!Xik9E1$*LJ%Wu79e~t1^18=peu(|cIQQ80}1pau*+dG7lAnXJNiwG(H!U>c4Slw*G3fpR+9 zqXg=hq0mE}TT2mRm-dS{H5lsCC&;Y|m9kxtzgM)PZx#|>k26%P6GQRFh+g-lfWdJD zDDe3~LUqCu;AXOFMHBtPML9bWh_3Dk>3-FjQ?MC@p$D!8d);cq_$0u=9`#MF%*f@$ffq!adp}iolZIguMm&9 z&lkBdGV-}flU>SX<0##zdOH);v9XM5z5IIJ8%h3~F6 z#8fti(Hif%UO~>120yu{)8;iCSvli|n&c&1qikVf`+*JO3Sb1}J3eJLoaI=f$YvvS zQ?m+i6k!Em!(%hzIs$1I&DAq zn`&O{U0EuoFY0HXcq?2uPZ)-h_wfgqJ4-#%-x+G*BXkjCEww`O4-pB|{*C(>k#+7| zY+HtV%d_cJ{y^PmHnZeMuB{U*T~#cV+a04J2uinC1m4rwQ;OIjJMvJx7ukt7oDWR( z6iR1yxzx8Fgv9|dD$B3L$}*&gAL|I5I}6&0xeqol)+X@@%V=Q`jjE3!>18g7Bfc_8 zD$*nN=Ty{MJBcz_ZbC1AtSm-)3q>qwsMWDEo{2XStMBKfjHZ^#I*!|K zV3A3F=lwmd#X;6;agvD;YZF5WH3!`tqieBCWeW8^Pyxo}Onk1o<_E7Q&R-bU%--M0gm{&7T9u zKq~3=IL=;M=4|N^^q=^S^;Nm4IR<{%N$X#1Bu5h(w5pQV9vwnunQ0{InQ<&FPtrjjW`2;XycfdOU);&& zhAtU&$)>KqXA~nf>?+&-s2^4SVN9X(^TF31x(AUyO%3VWD(q5TL&oy+1^1uQwH|4Z(d#qGgin=a ztKN+uS`A+APRQYnFj+HHOU1p=LP-(WKv~@xO z7oCS`bcWXO>yw{qRl3#~fc$FC^olrf^1}327gImdTkmvHeEk{h_c5p+t$Ho^Psw_} z7Z9!AUf-yy25&aPz`H+FNo+auiO{bqXY+k3#|77;Gz&#t;Lpe?3TRsq&fUOd|f)6D1Y6OvvF;*ja5^WB&&w zxUoi()dwozIw%76$Khs91^jt60y@;^CPe@%>B( z1^!fp32{C4em!0vQUI7W?{|$8?#lGZ-l6u9R$;l(x6WY2UQ%&Uh5QW?Ya#B^QawgL zmCf`qQLm_|W!XiV@kTIGfe!8uh5?u_+aI3%2!xaygDHiaN<>S#munt3wqTpS`44PA zS?xd--17s529mdkMXF=(Cy0abL<+v+qj;&ycf5L)DyV!D;it6R`DQHFz=!^^%nKW) zu`F`2s+GL3GOLM^5$Lo!DzD4sTO!->OH7PY&!juGMu=RNBj-GcGMx(_l#1pBGLSIr zMQ{ri_Va*c=h9Qj{68R%ztD4f|I!NU-^ea>*s9;rVP-GXY3sNxT7dY1v6S~3yV3px zTY-1-p>lIfV2w;lF)oTEAUn%-1en${Zqw;R1@mgp!P0S_9sn|F+4ec-cOp^4lypY%kWF9{9s!n!JtfV@G*FTdV6>UY361>1*+hHtgUvGxh?#$< zFf*egL-hmMv7!&tuJ_vRr!AJrT#t-#b z;Zbyq?@bj0FR!vlD00fAKt;-0+K;X&Kh1SXhZAYBxC!K|Q<4Mmwa&^QnIB6>GXGGJ z9kvDGJ{>n0z*1)8dz{@zGrNv9F1<_#m)5rB7GLqf2paAj^eKvE%1~$hk`3nkKH>w9 z1xCB(l8_S^-2_+HNQ8cZdznNaq}lghX1#s|&;k=rxc4Fx8bLssgG2|zP@&p+LYSB; zqIr?xKQ;OScFvf4?WcXb&JP-A=O`fNOC>=3H*tl5&UudnWG_4nZLaN57$7s zSAcq9g1EKyU)jY3x%C{Al$ZH)?&jIuW@OHg8N~LVm|ofjy73qM;D8K!0Aeb>AwxCN z3lyUGhEEM_%qBT}XG0nbGxAUjG_2(~79xTVkT*x;!IAXB@F0-xvSO7o0CL9WgF_&C zqaB($|5Mk3YqxvluhN#d67@N-xs%{=vA_e)?KP|exyh0}cySicvv57sE3w1^zx9a($l z_9_``WKq1pH!F79w&x=fN_^g0ekNz~f+ox+5aSl)0j?O6Sn|mqNTIiilKSI`S+qKN z9{qD<^Ot!Zdry=X#3hpku(UH@lL-Paq8FAFkBe9J>u$L@aDc5Q>60^Rl z?ohH*&ilz1(oN5u0g8MWh?F=ABn8x4TvkJkw|`cv2q48J4v>CY22g+)*G-Fl0XYp7 zDckXHbpTmR0ivAbMgtHdBF?ZAmLAsZq#Xe{*ZZg-QY16J!mrATgNxbuKdOWPoy=hi zq$2x!tl>Li5#}YwJj1!4{n8IoApj(#^?maIW3B7l()D*fZ}>}PcVIln5nJhjGW(#^ zK=9cTayh!DdU0r`8cfTdldgFdO)1^T=XpO1&^vo9k(hBjh4D>CdTF0J-X{#Thez_PC>010M#fB0)bVKcENwr_+tR*BOBw<4!sQC(4S2$^C;v|Iz}*Oa55 z$Y-{?xqPgy)B31t4sJ5N(QRF07Um>o@Sy69y#+St(ngE+ z>iVKtukQyF`?pxD_ey%dhaPTA?;maU*9#F5Ti<@Z1LMVdSU*fLqG1pa08{U83E2!fejt>vjh571EwW?)*eVvK!%X{vpJjdyntT3sl z1T3~e=hFe16z_ zeV0I3_mVygxPaU9W;ZAgaz>C}Fn1wNI|p$pFsgS~VyXR=GKH{xcd<$a-9;1`r#YPR z-T3;K*jR9foRx`xbCrr2@?h;3xVG#2qos*~eW^nT;}8dips6b@fwgaU}Um$6AzN|6+A-^ zdzJ=qu+ZBp3?Ei%E;v($#qviy;ROA2wLz>ND9qkG#tC@-XvyrhQ4Ho?V4g&U>6vuI zDC2!$qd_gv5oIRzV2s2MGES5ZO$?oj(!Xilew0mH^u+~%TY$;3ZRh~C)Bd;{s?yWw^{~Ib?;NG< zEMbM{Q*j(rHAM~z+EyKy3U9%SGycv3DW?K;433$rB2u0-bCZfN4mKDez}wVAm>#|71XabAw=3KZ=!Yc=EJzqWB$U8Q6 zN)UVD3yBT>sO`y7f$AepftBRyjJA@U#hjULU9yux+C>eNB_xB@PW^I{rG~1{y zCz1(p5^gRoJz5TuPK-HP32SQ`D)?^cJxgY#lq?=;-Pbf|)bLk9Rns4eo;Rh5Y|3^Q zBinZ0qd75BDTF($Dq~`CP)k}>S6f?IUl@ajkPnq;2F*K`+RR1_MVD0ehq85=0PWQ_ z;;z(X9ceX@4;d(!7_rJM{KqIPGE4u8A9sV9n0z1j~dIxyNnI%TgTBC=@FXh47BD zN58U&Y9+IS*g0lp<8A)?r9otAD9QOT=w~>7)WD8^VGYSl`mZkoBnU`E#%p!)bHxOWY7oE`<6vLSp{+df9mp8x5~EEY$9AVqk~3wr;9i|Jfo&X9oiu_yRyYo!flOb5E^F9D`6D!I z-WNj&T7^o*q)dtc5)TB7scxRwHws}cUJBNniA9E^geHi_hr3z;0Eg3PM4G+rLr;-( z+AsON0x5_q$^V&fLdEx{uIj@E$2xu$bZeL_5AWu8@yV4RxR^VbLhU`m;c8jJ&wb4 zjQp&+W02$9zClT)$~~kRSmpLtNQxov77Ol&ob)`W?v(nX$F}UODXp9}jjDxzDC@SY zNnaTorEKW5xB>p4P$*~Yn7STzm6RhPZ`4=J0B^X&4b4fZ`c?z$e~0nH^t#4>#&its zo4^9HH!t~M^Ps_>TLa?e(9v`2Q}@c81G}8;;~v(?Gwe6ez`?=@14w`kCO^){aazykn*uX;oJH>y+gHnshd){j8MV#Lh145KGyWvmwnTcQt-J!{lm9`A^-#?BHu zM%KVdb9owxCL|0dwHM@2aw3JG;K$8xDRCxhCJDhf+(jPWFXU}7sHi-Jh{6&y4wJ-J z7$(C89i=7|=VjyUi`W2?7#bS~eStCK*L4CT1Z~(Cb7bFdWCV_o#!e77MvjoCPnj-m z_>G;!CTXnbQJpYqJO!f$rs9W^efl#Uh`c=7i4z~E&Fbl`>8(X-wqIdWCQ>HQ5-QyI zh0z+J)|7v69XgXxb-`-XnGe%C_FAi&tMx{7YIJJyTGlx-L-IG9A<8`+i4yx}4&D(A z3Iz!XW4IAuX@OD-&7-4=jAuD*{)QTv8mWf$9r1099dpk7XT$lW@0FD(?z+cmE~nc9@!CJCr{(+H>_oqcVGimzx^6%{WF~+ zQ!r_eI8=AEeN-DA*)sN!#eW_IO7-)wG~GXNB{j;v-&+vNCmNWth95KRuhbGwO>Jfw zc~em>!*JoC0h~?I7-H+(&hS?Hi(#P6AzBnAHmHXO>m?WM#pu- za3b%~C&!ptTi{K*AUM$jmyx@T29P~m`&wYFB@PkF^eISxMjz8D-B8uHLNcS$)Rs-k zIS0ue?lO_#*+PUR>2z%z>4fj(qnfXehKdo!gw?Kxx&!U{+E6zLQ*ev-1C^4Na;MeH zjnOMng$yh$DQ6p1%MZ~i$#h0FIIlQksFen{yv_aua`g?(gEH(2qFCzDt{%wKJ>&D<(!rSyJLJa_Z`z1`l)9BN~71TnWi* zc?ArYL+#6F#ypvA@!y<|?D-Ijc^(Q!pIabul2UVx&g{Yvyr8H1S=zpC>MA{|B`JZRgvtX!bJWT=wV`Xcgi~c-%t-}5nAVeg8w<0K{0NW z{>?jgcw_N6d~^1YrF)KI{K_u}fA=iBGk$XFy#qaBO7LCAHT+$XA{N<%a_F+RU!62# zRTNtXc^!fHTbpCpsF+fWdsYApP!|q1-{sDWm#vc)QG%S@J-8oo1Cs-*`iw!g|v zH+sJ-N~qFJS7cv3R#ah;M-K?&61DA}li!ZVwj?A78#*PkbfEU98b zV2Lbm!<2;+Q~~|rPj>(dm01M0rC%F&w&+D-#Uk0VHM;4!7~NMn|aZkPzf5BY$ zKX|td;&+RMwF$LjK=~#F%j_OGt%w%SnK`?!EH!jm`7Lq7-qVkOcM8%-5E* z#`YF2m{X;&qE&GHWH%jEOTQX_RWe)@JvH)KX2(=eN$qlCLZ0|M?%q6!uDe{-GQtl0 z$5<{%|8pSzLvQ~-rvA#XRqIe4f#h=ce)mG>EMjjZioW|E z-SazJOhqdZ3?NT+JrUD`Y%UcLZkG@O0$#ggDAphG%^6w^{?P020z;Ygyb)&Su>VVT zOi((E_yf#OaH*YZ4Bx_m>~IwEsBKmdkXj_5gt3G6Pwhf|MOeL!^e1%45D*Vq{MV1b zpQ!9pAwbk#tr8%bA4Y(jKF$vbL=s%mN+uLfc-RMwr$gL@J71U{OPeuDR6X}JY6Q5CetN@N_}Q)xKD089vx0zoR#sxZr;lJLlC zYfx%z#v=m@5UfA`aODHh3iCphqcLHT2M~|z5!j^eKoA(ZnuUanCH-?$5wR^yj6{^u z_GAHv%of#glE=*Lywqu8CO#s|tSO^pcjdVIz-aEwqCzUcR9NZ#!Rq10grdv{gef3= zU?iZ(IZ)cd<+J&Ln7N31f)L=6e>Up_@2Dc68WjwJa}k9UPLtJ;sOs|rv!%h8vI)^| z1rf)SV5QadDpglig@Qkg11?8j+4a$+(vJZuC7B_s+8(8wet+QSbV2WHF5s|S~t6Pxq%*Ylg#FOg7` zmN#6r*qj_AL0F13CJkG=@6p9iwXlC_t$pNOZh)@p?shR>SK+5fo9L{J8pUW61-Ikt zEpfMsQ-jvgJt!xm2FgPUQ80?QY<8|%304@4~MDE@stO!jZ^vzCrpztw9PbRLu)(g5vzQG>i6c)6>F|TPW!f*=g=EC3 z`uF?ejOz`&>R%|JoJLtn+T*;p;BQ~^pkJSuz=hD?5P`jNBoj;|H|rdXC607h5(XkM zPHE{uf;*(bZT8hW50KVA+g=gE^Bg;(rmWh(DaCK#8~VCLkOBTV@A@?G;mcVN;Ap*mz&e7n{YK8c%5{UIi)!`ba<~BaW0j( z<%8Lx5x6ENOvHn!fBt!5W<|roorU1i!#eN3VKv~PJ%PG}2X)K)6!&*7J!j9Cx4zmw z2#cm9-(H1TE2zd|3$M2L@baN4aAG|aVHB`$xfQUa<^xJ63nmI%3@1(^Qh%yh{RTMM z+%HLaZX~ClMa2F{e91C@;ZJt=2e#iW zoDLJ7lqswYMcykZyB?CwjOTU}lkn%xsQ zF8cHPI$RBRYBC!Xqpuqf5aBf+HySX=4f`L@WnL=}s&hE{{+$b}6p?uCZ0*}J&~JBWE0YVKSEJ*j`Tz(Jth2{}2eZ@=?*P(JxIB5bO=5?b(~ zX=Z{J`nP%KK4+1TK|fL77W7Tj*nCu)?M`afU#TfCVb)c6ZW2z_1fRuTWj;sL6e6#w z%Xv{AcsZ@S{2ps!dFQY!-fUUH5{~`Q;9;~xsoqqU(`27nxqlV#(BavE)!1Uvmhu0! z16%5zgV|*LH7%RfV-)X&Uk-~kqqHUF%=ztMZ+;7)7R`fzjzfG<%0B;c$Z{tH^+8A$ z8$ITG?dn2v7Ii&eIYV7jD1UHU9MS0EiM%)NX?_zeRGK^ZR+hcfgMZ`9#1~7uNjnB* z`YVpN1zwd8{$~tGg?U@(s1wpQ=&T%j7c8XiwjE<24Lj{OVmL57VfsEx7p4&o3GfUi zW&el(KHhA-H>|c`AVt7C6Jyl>{e+O!0k-QeBHSmFeOO!olQ%5y8z$%B2tvT)hyWG3 z%+x|Jf&2`(ZSI(%Z|VekCwAO1?eNF=deYc{og6CX8yKv#3Q376(rJE>J$5S(!Yk7|`FW7v23C z3&mU}L|<%7Z*s!qIHl3#&mQlwJOQF*L-~FV+#Q2|YR|k3M6988E3 z4<=^oXU6G<#*Ic|38J}K->nC06Z6Xiv<9~0HuvCW>Cnc z%w)sgbLcwR$N5qYvR^+Qx>`+i)D$lW=HQ1K-9oGc&0jIViDEN`{13vCWsIDf~7BJH;8+ReUh-h`f{1>zn7~6d^tn%o6P)T9YwP zXrw(qKTMtl8q1vl;u>FzXbK$4%#^5Zwa=G-x=|PRdf9x$6!h{kfJycwikQ4Tz&MEU z7%OdmQt&Qj%M?#R{U*~4)h=-+00&cdC3P4X>&bBv7qt6ss=5#ScUc!Q8_&02s|4EK za*!qpU@2H&6xXx_FQj5u%^lDn25-DuHiMA+Z8sJ zG*&5+sSN?C=A5|+CJsHks|1N4v5bTpXW*uwjpDf6v+daS(Tn+h00^U$-&0kQ3k=Jt z{~#2UIPd6~ApH1C0x&WbeD8e#xrsI+hCTM(ZZMSSFV5XQul^8XJ`;9eq(%u=VJ325 z!X|$xeGNVi_h4pfz+fh*@}E6%2>IWVyaoT(1rRF_5`qtqGNhNqisvJ$nedY)4;4L% z`x?%fkjih$Z2eGpJ+E@4vTS#4cMD~@ov7xf9*;GRYTaUkrhZTVt_{_=!4=DZOM_F3 zsomy+XTa3K+`|583BXll?9g{!BCPDk3}hO-$~i#PyfcJ?t8c*&Wr(acMzvSy6$Pnq z`y1rb!MC;QdBUSSQ*E}ae&ZbmE#^H(mJWa)+XP*Ks>PsnAd62Dum#t~xWkdQX3d$# zW-9bvvWw=9S6^!(uOPRev9z`hRl4~AM}}(=7ryFG_*tu_l+7x24NOcFHRv<7SvBMz z7K#sm^eX%VIABzew;2^ zFHlVjxDpVs=TPIqsilvm#}sBr?V>F7zc_oE%mmo};8^d_>X51=3bBd`rsJoSb&}i7 z&l&0IGMn9rjU1FOEf1KD$Zt{CFpG#>%ZCo&51fI4{^xbX7!g7*ATTQDAb45@8MfEI zLuTS1xi!a~I`v#d0G@fBJ# zb^I>o`epf6duW{-`?*l_?Nyr9)|6BucJi*qNw@L3VOIIkJ%F`;{PHX2T+z`N8w}aV z8y{}LO(gbi=n3Z2;KJM9iprHR=jJ3F-i;w)d_Q6d}?B0W;kOz0}_{O#+yk3i`^+5Q9!=DkkElbVTT< zCfK$(GUmNAijzU_ch7256?+_OTQNfqe11^nqB!d0oxrA%lFPGBv{?p>&Whcq$<0fDm5_ynvA=;e& zf&?CKc6QY7KA&Y>IG)I6jC0I3gDu`}I%Sz+sHnR0n`D+F3xH>|f{p{_djU z?2rza^wM{RDb^iTFREwE>wiNJ2q76LyK8D9Q%*XPgFe_|K|%SEreC0$&ts zj&lHQT;)7<9x0vtH6nfH*DCEdMg578Irkkb_$h~s7|B2g-&EzFg+j(py3ZV+E=J}w zrk%*yuj+Ne;=r&*%T(*eAM_;TQ-Iv|lW%59vra8P5-JfgX%Z>eew|E+r!Fq+Hff^% z>2uIYS19Xk)Ij?!&aFV9d6Cm;-=RC?c|sq#zi(W-9=NjWf`f6=dgw{kB&izFS-vfN z!J@NNY1u*-tXxU0(#}Q)!%VAMWg-Yxt*Rm4vfkWo`50BTq!{xV?fY9xTia66k&nv? zoVt_7B!2PCN9#ilDvrHIccKOUp-8JTJfPMDMg5elFWK*0h)0yn^d7oZLxQLAhXqNFgJbhBZe%rSs-=e<)6D-?;#X z^k?PXAlGQ801WI~cOJO)ho@Ii|E!`&@y`$2A|L{EMi-FdT~-0GH#!D%%qVXa&2N9| zvT<@Xcb1JcvVydaN0FeMA<(lDhbPzVb*{c;ciXq>|T@Wv`&G(&_I| z8KcEA1zRPJbo7C0fp1Ht##Znk6gG&D*^Mw23!YixLVaS-Iak%<1JB=;GokFR%GY@N zu1`(FjHn0osmZnB=X!LDZvJKl&tWXo>?p$j%A#FAFQZ>ofqKwR6W^mHEN8&fsd#|8 z=UrCs>J)}eUOeoT`A5Gy3)alU4rrQ|^4izK!aE5{svvZ+^*ZYxwu-p{wE;slm^90Z z1B)&KQ+VM0WEiA_vV?@0lSTb+#lTdl4jk#an)$H^V2QxN0om_$Nbwr_(t7!|f8jj6 zR;Odf=uf#XnU{%g67x7~$ku={LZX7SHGAWD<+ekg7 z#;q5t%~N2+rE9BOD(G(QHD#GbJ@n;4tdG`etTs&-iSRnSs)}Dl+s%s@Rqw$hed|p* z-~$}g zkcaQ4F`5P)HR29*g-STni6CzUc?LzL1jlQI1anqqGI3g_YamC(nNO+$i(J=kv8u*e zd>(1?nMQ3v!`dNBc^6zb)%_~wp3Yyypuq>WlhSUxRK6 z8h*`T) zv&^Std|Id#6N zifcel|5DSieE(lZ;nHhS>Dy@6$uJKZ$*NQTZ6JY##S8!BVUX(uON`llbOE%{q)cC4 z|Fza9bm~70-aR1J(T#GM(6jH0zgom646NRXOViTt)xDJA(l`zQ8kMd<%MIsxaPMJd z3aMI`c(w-5b?lh3;>5NxqLb`SN<=zc5K=K{UxR5VzTuE354cp{X!-em->3p-F~YvW zwR15{ca^J9ssB1c4Z52Hg0R%7u8ylFR*ReM(!7Isc$_K#J^(!svP{;^M&?hpVDR1I zpLR25%=t~RIBhStDJT~PYqkfOfH1_MTwhAs%ZNa7>J!K-eua%E(4#LNnOnq|Kkm#^ zlV04A$V)M}2mz8GO}DcSgYQt5roM6wu^!^v%y00k9m+tOfdxzeL9<4kLBx}X1jCA7 zC{_HF#u8HWj$y2!HINmvOthDVmN$C^-3Z)s_HKv;l;zS;g?FIr5lS)ICVJhmnqCp}Y6Gsy z&A3K(?x=!Hfs)a4X$$dk2hjvrY(fED<|7kp1`7RpW#+PkF#y!`{_$7Y;lhr;O~3TG zwGlTvN*S2g@$|}^x^(LBhZ|S{nmpz42SCiUf_j-s5c8X1^Y(&p3d6GD z5OX;`bEoAs2iO_yV>FaO=y#q>A|Ym>3`LZ2Kj6Ep48ka|3D#{*k(Njm5L}y-TQ32y zAXVIk>A+AGV`AhwI1;S}FE0IppV4q3pz&E;sj;7;*8uKbbSrbo?w1ti$&qF%%;Rbt@`FrZ`~F`CQ#=B zFnL)=4NXKjlJr%sY)9n6d?EQHBF=^WnvQxbVU*+I5I^ABKDY4;jn#<8c<77(z7{em zjK9tDl#U~wW^X+1H{P+4bcBobY6LFlzCxOTcc+b-25Z*~xVv%q`&o?Ddx+{jxveIMJ}=3lr8G zH^3Y$TH+ro!ixE)rKRRAUdDxK;7gE5zD2q3R)#g-8BPa=*zMy0Jjhb8f)SBY0-Ibw zMtGTDT!et;fCw=d;=aw-Lm8u3^CJ^9Ay(W1u`l@}p$-zpjvhI{RrP*kd_z;82j>q( zkSTF4e?n~ll8q##*ZM$2bFMD?H_-9F{ymX4?9;rT!gBL#frh3UP>E&;IX~EMpqhkg1nm zmQL=KKmpd)N8nVe1iXbz&7=(DAk5&MM1c67KH+|^FIDL{DQFPHg_GjBBZ0y>{xG+S(5Px{i&4^Lr)%U|kTWja5@1!W zCY?^@d2cJNl1xiVXkaR;G%9B1ma*Z@<*mP=YV-J!3F@z zx0ZnF+hC*3TU`xnNxB3*3|z4BB&%iZh6~O%DKHK(tBZBft;{_a%B96KI%MG)g%_?8 z;~Syp`Erf&#n;MP0g23|B32V`!PamSYMEyb#G>KFX3}&+2PpQM7AuqTR!ato=M^Kt z6W2;}gd)8MteK&$%>O5(4O{X(z=P?tjnaAnkEy>40g@tXzsCgxniCl|_3&4Zh;CZI z)~l_6$FW0N9}g%HoQ$ViegH_BK@hrT1K6ec!5!R#oALpM!>NsAVXJ4WuP5X6G_kp| zxYkYvLiJqB)H{K^kf+SNf>u*ROIS}C_0k=+F{=DI8HwMN=Ic>^33MmmIyk>zH&G@L zx%|vSt-E$IG)1IVZ(wyPfn@xP^UF9cV-wRiI?gCM zeb5}1%A@i04_44K@CRUoYRL%q;&q0enHH41^j?#p=onIHjGj%ACv64RsY*wE%DwJP zw&@6yw%?p^Jpgt1D*#`550o*NpSY3GGr)-eBc$;@-^;r?*F09I^VwPF_gB{1+Dz{{ zp;O%}dUlJp>*m;vKFn&E^SDz0%xWM+cg3`Pc*j6Czw`|$7)Jz=jRA7PNU;=iCWP;| zE_OYQ3cM}j_YdR4nG;F@YQHm znGJ`{@M~;bY?fCR(jRcZC>kIE1LGFv%vn%7&QHh1=;UVnD^iz-%=s$9Sagm6Tl;tf z{a%bVu4^<%Lu#ynp9WLP^ZGQu=;NjY#JwM@;{pfVC_*JKXQZKKXW+&z(CQSd;YuQx zan3r44SUTm8itCTS*xO4efB}JNqZ?5lZ+g%I9!Yypek#;5xA#c;tM4m&(04Tg+h*| zXRqNCf<^H9OI~AeJf>DgElrj_EtpIGxbxB(m}UOkWX-;{ny4(;S#?R)=4wmQZ!heD zRhE=RuFYDEQFf7?7@ks^%jp6Pd>tr?xCu(Ianm1w0kqXTP{08OX2Lj!sun#^&CCtJ zFbQW+4eDX}#sdHwLL!~YIe^CG&8EmTCuwDnGU!V9|;(Zu-b{*nrihZ}joR zTbP@v65D=x04QN=J^JeQi>BJ1w6Jst5$m53@sPDpGM#z7K`0C9_u zQkGn*j*^xdl$g`X1!53Z!@9jMai3RfAq!PC>2ls-&CvxlNl$fIFK?E%Y4&%+xOP*e zNTQ36xnG9soC+ZTfA^xRPRf1r?ui@n3w8+Hzz!q#2QbJeG$6nMGIfC~19Y`XociU3 ziBx2;8M}yhOOqp0=3fblJAf-=m)}o^+XIM2#|x>8%4SC}d2B;9O*UB<*ovt(PIJC+ zq*F&Qqb_Le0Dk-(sH?+T1ZoF>3j>j@!y|xYD0MYMFJ)kOOB0i90;ubWtUbU$DFd4r zEP0ahqdVC3SLyQEsjS0EZAvs9rtOEHAoT=nvzez1Xc918TtQI;+&F+zuoSc&YEV+o zMJp@&ZD=V{k_e#7_1jiCg<&P?ffpvP&l&*v00CaW7|rRpywTgt0RmPW3dq<(wgH() zGZ{c0I=29}W8^l<4#1;LOjB?T2LJ|GVGxtb#X^X00LmTGCO*;t6mVAo;DyNclZQPs zF3SVVQh?o0XMgAYa$3S3$3T)*r-vfwEoo~RXqK5eo_?17}Ceg$Pps02*2)E&3*Sjyz(EQVD!C&dkXJWLr0o0)# zrL3tJvjGr$U;)Ux;1O}ihpwF`P-p?@c!no;84tj29LC5KFs)LqR6S>yu@E=_;v>*i zqAHI~D9s~vHG_@?4|%Fj!jE90pd$QrN{SD*~&SP>_#c!1s_15CIXFx2px zDoqBs+XD-L<<7&7slse$1BCCL$Xfsh!V+0_0GZnfE(&~OmQD&*1~`(K5{Aaiqm~%+ z9^PI~dfc^+c!1a1zbCeJIIdslcoA|(YOxz4Hugi6jeQxpH*>&fe(sVSD9N>vGjV4( zMr_)67b5Z3GO?Fc9?vpz%(hG1aB}<10TVlbqNL7-$Iz!H%P%>jqFh6ABPfj>8QpC) zQjG19>3bIKMD=V+w)*xWJ^yK$64Ex-);RwQI@byz`*Lc4i9&6eSObK?O(wMO)^pmd zffx>LSU!EBu1y_L(S2Lk<^F%M)olaH@vH$Xzugyf_M%{1B1kluD*3-0cUG(H1u?@W z6A93_W%+FgfvS!~0L)d*K&W&8%-bXX8`wL9TJ)?ey{M@fFvXQ128I%4HEZxTF~aV1 z!jvtkQI1>;ys*TO=duQ9I5Mi9nF9b|>MljjZ8{RU--RghE8qOGCwUy8ZX{?_{by3l zs9pqlpI`2%n4thqE`GwT7SmPpEp!}5TD5$(Ndmj#e)FB*J5z|{goEUU2b;=MO2qbeH= zF_;SVw>Bl)R&Y60d%z#P{aT&211%y7CZZ>8XGJ=$XQT>khtAk;7f3D7cCe^$?^8az z8;2auM&b_1{l{#^*&JiSuZ^>dt%z$Y6s7}Jg`hEbgUPJI0#GmE^agRoz={oqF^GO` zxLsbnxhdjm!VrujpZ~phP67Rh?cvRz>{FEYgnT618FbJm?@)fZ{fo_9oc3mC( zg}!tRkp#T`o3Cb*!>3xL11Y^ zB!>AQchFB?h|dg>+b14$GRO;<)COW=b`i|d@#Ms{CaLP&#k?3q`+#>MWWaAi+|p&C zuta)LU&h<<4JQ2e_p>A0Rw3mEGXpTtM+`9)a-t$fY|hFze558M7b2i=<=8ksAdHMp z?zlsKx!0xT_FxI~f_C^sl-a>U{=_~Y?&IVLJzUYsj?9)>BgqGNPRVy+ zG!Hteh#n90ofILqL-jq}hB@`tWvI>fas*qM=U}_Pp@#C0#^^bK6Q7JQFU!F^@c{n; zXrouD2XJQ6z4hq2ZvY03<=nwdnKUlry9|LG;sNMNX@KR}G(>ba00jva01hgT&3Yqw zI~x$3Dn%~fz%4D&6AvJ6WG@dK`z?QkbAV-Vq5!vVKR3&mhYP^dTmlq`wT-bE8vwlz zDwT=2gyR92G|on>IetKul3)S&Gx4xel-^|+i60iYsh1SCy5E43MIHP<;4@F0Nh)wJ=$iWglz`5BXDf9IJfP}5}=+)Z? znrbGobO;UZY5S1ngStg*r0v_0oa}+%LR(?7p3&WM=q&9^!v~aX?WJIH12)+`AZ2N&+b>MO(5Pm&mh8`^V&jam9jEf zaczMS>bAZqxYlO1Jn1)WpYni_7yt(qL9j-g61FC{4jko1i3*GpkHOja@`STd|1?_u zkZ@=;)7c!*K=QY-QUhe}ruOqk)hm`q3tc0Kj44Nvnl!^npII60@N#;QV(B&$OEYng zi-o1JXx-m;P8&QdL%0p=SXuOO2XZV8xmN#1l_LE|^@6bUMVTFzDy7p~T@jMWVn7VV evuJe{>;WT5C8z50gfj4eoe$l8Ukc-E%9 zv-fY_>s3spDsbvBjB6ankvaRVawS^W)ID*TzvDFb98RwyjN1R*-93?p(&rM9tK8ek zlR7A>ihsHuLQ+%%nFG25tR6Q_3$47ndQ{RwglL*ZveE9?mwjE%)1zM7&g0e4G~L&o zPAAcH&+Tv5ahC(>96@>nrJ!lL-LWrsJ8!4QRF=kKv6*?!&R9v)JRN^~UB|vUiKchB zw#Uorbi12QI?Yr?&GL-J%yJVIS-=Crl;~-y={~p1YKkjMe%fu?Aw&M*RkB2sg26*ad4kYtrulRp>o)4-n+9$uf3 zFINdWOIYc0xjLTkm4+xOCDF7Vdy)InbkEs%E?3m*Q4?u* zxlymIrs=NZtxgg`)4esfY%-_icv8`fZmbM^$ph%NaQ{OO)KR5=Y8#CvKxMuA(YxZkIV@mpe7L%OtYP5%D+I zS0WdWinER&Tea3o+SCYoJSt9xbE_FdXB~EyuJRyh+wJf(KOwA&j{iI59oQK!Hyjs z{J!5>-gcv%%Y*eCkH=xU-3@;z{vU?_&p=R+AT=`Z&%*zk|9|5D!FRb-Z~^&o-T04x z@M*G)E7yCGBb3G1J_xQayLM&oqXXt`jx)-g~R6D@|j2 z@9(+%)>pPizD2ttFMEW1dl3sR=aId)Wp_Lyw_fB%nnpyhKg0qjKcwVeh5uKfCP~cy z$MhsX8DwhUBn1Z?qXegNz>gm%q^UW<5E9^mSKu4`pe)75gb5E)u*NO$SYw z{UpsT>o&8SWqz=e4vYy4tT%?_ptzi#0xi2;J{B57QGi- zAU1S_4_<4M+>xGw z5EQVv8DR%I3ZS0nhN)?~Pneb-2nE3aKv6@OMsVSsC2H;sOVe|o+g0A&8*Ia7X(~%+ z>bX;Nr)WA;_fCiC-rI84!zdv`7mO=arZU*6rD9+k9dq-h%I#^!cFFr8FHgvGVHn;wS*4NPH;1XtRCAxFYHoAyy@_BoN=jNuX}@jJ z?nW+G)E;r29vYQoWz3}Mp_SD`Ni;pg>h#c9()3Vob$7QOq$_ImP?DzU-pk(}k(L*^ z!C$`W@$5P_$c@FZ>rSUfSq!D=j)d*$Q7MV0d%Y;fg;Fy$HBA?JIZr3i^ty7LRHLM% zW3pS!l-*J@Ejt-A*BPmvGn7Qp@<#GiRc~dgYMNeqv`4QVb&ww(K!9ra03kOFg`G3H z5#B@cH`j5vW&j~;=-#qK+O9_>EsLQql8qy~ptp$k7X9c-P1C)eV|D0?+8gV>{m0s} zIU7Azk4V@YXWb4&({!#I^`6|W7rC~A2$XwP5Z}aOlAMEcIK5W6Bx@=$DEs7_4C40a zjd`tbOC{EbVzpQ|S%qm*D`-Mc=Vw}$>vro6jtZx5^R`{rbDajnZ;#ETYCSU;#5k)R z4Nnzbk?WK^qk(lsxR`^r67v;DS5mGmDB21)Ycw6g2;>p4VA>fI2JuGq^o^V?r=}oLdwN#3$S!`DlJdhQ|NQ3f!8x0hk}$fLp)^z>in|DIPOl zs>T1z6&sqEFb!Ozxp{#FIv@eYyiokBT!?@IS?~xrk6T`qT7V9on7}l0_2d-cB?KbC zBYxq>|DS)AoB3kig#Vc)S^xtS41|4fs5p7j1d0hNM!m8t5OR2Ka?xq*By?fghCke;BV~n(<*^$_Bt-8zungMFSSV z0mx+r7I*<}fk*g{IRXVl^k6js4X6MDPik5a!w86w*|DKA0tf71Wrf+lkWcz=<%J)B zPiXT(JU9=8r~C(ELjX!C8LUwW-H**tX^$yQC;k7A7H|oQ9C#r?4*079PBp=gzonva zfe;Mf0q0;G{FtOMqlW(v3X~>kHA#+K%z&VZ3^2epFpX~q|2fq@ng5umNfHCY#lK30 zB?$%;foWhG`0)qhV{Dq3GG&S1Hvcm-Pa>d!B)A100YCmB9vA{;42_j$IRxG49@Acq z zO|#r-%rv(YR~A!dZ_5(lZ_9gbx3lGPMLxZjhVH$oL}TOOB(U}zCPEBULESHRFBz7MWJL=0Z5Tzla~%w+?- z@N%W;waT>@Gl)Dgju$hC55qN*TzRc>O>Yr(WE(HNMSL1~rw15bt6bAd^b+w!yfZ`2 z*hk50l}#fsG>yQpR)Q6Ag)CHp74eNQ0I1}Qf8gM?$~D&&v4wm99b|q*0<4Ma&g_e`}G>ifh zIpdzF*DBXqOhlm^Bx^C*^5q8-en5dKXWXNO1STXf5&ye_X)fdt_b|OyxyFJLKPUNX zl5buts2L7&#ywJ4OyW+iv6yB;lL$<*EjeQ!IAo{{C1c7N`-qa6GLtNZoUsok7z#thqH@MON?;}#W^y7T=P4(~ zL(Z7Thn$=!QfzP%cXBNRr`S@?m?wr1oP^*+{gN~0Sz6Jw;s-D*R;X3Ds8FFc=|P2x zB`{W+#GPDg(((e%63)^MIpZElnwnB5PbrpODVD;xps9(=4mV3TOIT=VYC>no8S{8z zvjpOn%@Poznq;e{vrEo+$CuU8jcm1a_K-8)p#?<_C~`QbCa20)>KGmOX?>*(@Q<=PCBn9I3!KdLx%O2N zp9UUpP~x=;g`KE#r{WX^8CA#(k;q7rBptIAKwwBHE{$cuD2cTP6Tm};!x7;?7>VIv z5W*;gVIT%U7=&REV+cZsD8&>rQ+n6(>q|WTpaFIsqY zH(dzo8}NgYtBOo&ZacUdAnp0g&_x)Sw%95x#NMG+gGM+n5lBI;C_E6mfDcIG&im?D z8$=-1T*6R&x_K^|BPQ_{!z7xvbp?0eNJ>rL3FA!-$g?KP3l0(^akS}~a`C^Gq|J$H zMCCq>PE?whCn?sXT<=K;R3pP0QUhRn)wyhdf005<|KTr$G=PB9?!QepLmD3d0$2~=p6%6FHeifPHsGW`fJDZ~7C?Zx z9B2^V~>~yN8k5xEtj700e;bfXsKmTrmr!z(&^*2Q=8A3bkv4oSIxNeAm-i zz2~C%jA`Z9oNygQCj_PEmLLp)+bd4UVH{Td%88U=U7;dvWhAh84-pWnl~Ef81AOQK}32vaf~6$ILnce)gEE%~Fntzz@Y)jfS}6iK@)a`U z7oyfbMCBp8jLdmU833-!(4p;PU0-dl2j)ecNH6LUKi~w_&`p%ZsgmMOxbArySM2DP zq?~to$E52LDyXnv(|0~YfboI0k-d-@#M}+g`solRA9?=`qmgcCsv<7@AcO#GMXrVf zsw!z<#Vmc09T6CV7zFyp4kE1Vmt>e>6(J)O4}p7^A9Jz%Mu(IA&;BsN97}fweZff_ z=qYai@bxMJ8vTde&Pw0+A0PEs(WNbbmHmWJ7HtV`Q8|K#86DQ8E~$kP504nE5u>4a*2b3XqmVsvG0tFTgyCWfF>o zp8JRXC9egDO6O!wZGeAPfX+5FTqV?~i%XL^@_K%_gpGXYN(ywSD-QW*SBQ&;Mq3IK zKO0URNCG3hV2MjK3h^xCCYxS{t^&#|F|TP-ckbgIJkO0Gv)PG#RGol$28 z@xWJ)LTDT26&n?mmX_&4o7iBg8k}-UA$@;f{Arrf-?D)i+&)8{KcZL>xy6?u*77@5 zQ(~fSM7058saPPIImq-(#^ICI7aup`33akARvqx3I=MR#oUou~6fa5t-eEb`fcxgL z((Fd_*c#7kgBM6_LF!+EcwuA-AO+#79^KVUU4ZA~OKcSU|=A{qD_t(<3#C>5K(t`d$2 z0h;5vPL-3g#cBg(ji`jyLMSznuIZF{e=06H)3vNu#n$RIDKnwoIPi$_!viA7*e_sY z4EWSAaFbXKHR$nx0PJn+AN}p%k|hlZc6a5jOs2mT3TE596jA zmL~yVGx=E?n3h#ibzIpr1N?YMGr?L4T@REYu|*Zzij@XtSno?~N0QiNWMGfFlW6p| zJ+ThkxL*1rGZi=F)AO~F$7e`Ij1QoJBY(!6VElWUc5Ez&j+4+7aMu(u9_1>A_r-g^ zp4;~UCA905%KxR$o8wGMQ%tVHQ6||NQ-APYcZx?2|5i_@gry>MBl0A$?bzqRK)l8{E+*ouEO!kz`=?{ z4S~V46|VJ>t`#T=e2}za--mdHD=qeda!8YCSUDH}=84G0Lr~SJR2G6yLh}HY@e~`I zshYIV(QAAWEstTP$pipuQsIKt)Qi(4qchQHm1CaMCVw_t-h@k(=n$^EbHg1qjlGeZ zc z1kOKBsy{lLTL1u=JR$ta1qRDuF3CI0Sc>&BwST2KP73zvCw@TC=*S^@I8U#C zMvlZ~jXEQB2sfekYlhiR+LYbkj7FKkRyGA^P=9h4U-uuVa=p{E*dt9Rb1TFJR08wq zbnuzRO#oqxG^L_gwgrMyCx~yirkdh;TOZGLnNYWNczE^p9LJ}o805Jk=U*5M&^MGd zf=;YF_gHxKtF3NdOM@3UEz#6%s{2?C70Kaqkoc34x`SbEnK4FLTnXu<0m5`T;1*mz zaB36svodykFUpv!A^sv6Albs}gm7|}*(d!%(C3n#(+?qnpz&d%Ee`sIW zN5x>grZzG+^zoTN{WYgE{8cfema7wG!Q8JL?&(6(f(a$(72d)+s`1M#s~Fzy7<*uj z3&(b|9V>jJ0vDYy%1o2yki_TBQVF7p8dg9pC-cx68Q|Ai9c&Vv9$5V}rn!yl}vPDLeE#V67P)M3O=i(n^bNJWg|wl+NHyo2PQ^gs<-_ zeBf3g)M=PJeqtxQfmi$yegzn-ze8~hS;`IU`H3hKPP#LTzl6c*6xqC`BQu29_J9lG z)S*gHQ!V;`mC zO7O%tU;xwswEOBsQUEw)dlE=Xr97HCg;`0j=oZ6Hw=0fQTY8m)qXqjmpojkwX23TM zXNL^f(SfsgW-qkoIe-m@^&;TA0-_a+N!79qrYrUUY>+K9@B@qz0oV@!5d*;V@h)5m zZ@q>*08=}_prAjGmN4CzqP*R@0SR>foee-XWdq8^%7~FqHHZD8N;2rP#;3;t5UBkC zbfn#u{o(+Pf&lXkz?+v|#m!~MBJMXw~Lp3%CZgfb<)O7=Ny#Pu|R&26f*mwQcKC_EnPT#yYnMY0mG zqr4I(^$tG^Em8=>G9&7!j^faAuV?9R!qwF*2~9mt0{|2WA--5{E&6{27Zs=Jtcn4^q#{3;n>Uph)r}1~cdmt+8T+B$pcJ0TDIe z__mT>AmE9|fI%|@Vga+kY}Lf zpq*j|S7U@1xzP>qY@YzHgMCfsb$i8E^c)u_rhC4}Q|kracw;WT-DKpF$kL%sfP zk{0TnbpYAM0Jet6WdaD1(`_<{25{^)KyeIMHXOhP1#ln#mt_Do{!68~!E?C>tVnxs z|IDhu?|?Ec2AqhJ^#la~gX*@R*)391o&d11n45gaE|89^pgWwZlVH2R)h1T}hSGQE zBR>J(&;ei+@m*M(QCr6;fK~mfl18650D}w|O_T`gIRMlE4J48#VC<1gYZ`F>1<;CT z5y)nsx>?Vow^Bn~stAxPfI_J6F40{T2M{z)&Mm%J1L$7VqxuHCA&Q&gL^nk_WwvsC z)!Mx)zpOs^X05UG%Z;9}%@0%a7$i*akT4_yK(C z12kOc84nQ2fVe)e0G0?DT_g?E0@meXGk^fsfdC-*OgSf$9x|(`^~>C!p?N0Y*1&mk b1APL!`7WIg58JHpwQIAWC(or3b@MN}Z>e1^ literal 0 HcmV?d00001 diff --git a/kernel/tests/data/v2-classic-checkpoint-json.tar.zst b/kernel/tests/data/v2-classic-checkpoint-json.tar.zst new file mode 100644 index 0000000000000000000000000000000000000000..c695339cd90c35be86737430bdf8460e1a3f0ea6 GIT binary patch literal 8448 zcmV+bA^+YewJ-euSiJ`US~g)tT_Dau;~bUMY)ry&qa!(C*p8ZW_g;JRI@1&>x2KMv zU0`K}w$b`~D6!*p7oD1sJhM7R8^yx$o`CsY7-!DO3(o1$50I+SilBAPLkLFC4wMRq%1TWadfWyqb@d6VVSyX`yGe)5TSSf-bVbOpu#dZmt8YB@}8VhdN zaf&su0wC-~16mZ&)B{qNBrLGN1Rqr}V5Vt`-Zr{rr_J0pBijj{6FfX8cy@NL8=agJ zt0z)ZQqX-f_lA<0k(nt{WJcz-d)>G)%Qe%=tC=hliz(s)I^5tx8&%g7D_G%&ilY0b zs{5ASPT%UHr>fR)Y?`a((l)KFEbpwbZkysx-2TBS6R}N{*So1|jx~KOT4Q@CuePdd zT0_+~S7UFuG|io8&0HZVg(ns(DT$R?F}v03SFf8@UfpC;#spYofP@E~=(^JtHMhD= z7;Ac-=a!vE!JiQu|J>Hd$OZ$46n^-jnyGn1Z{}(NIp9nS~NW)mC-5-zr z{g7Zy(?@XdN7L=Ab=n28uvYS+)5P}YzCKOUaSv%26SYpeLL#wN>OrzG+p@6>vT@1F zd-fPC&+_kHWszScd72-+eMb`3dSw;H^4P14!D29)&+R*sqYumT=q%6$(jyuL8L~p6 zO%M$>dd~Ta`0Hb^oF6ti&WnW6yhzxn1H-x5y>4tW%QaJzS2LN$gP5H27_;U4@|I_( z@mP%yNGu@AhrGzMn?yi5ws}j@Ei>2KdYaqhnfuJ~Zl{^HyqdXhyqe2H(^`t2yJn_5 z*JV-F$QDc@0eL)AH8nSSTUB(+cADPy-o0VZ7Q09&3|XO#h)~GFMBCoGSM%jsbEh|S zMbWdi-dMKgZZxgAJh!d&*7UZfzFw^s4_RSt$m?^5LLc|{`g~%d8{3R*sw|`C-h0CU zLW%>1j%Yy$I^=Pc-5$)sZc=b4w7EZ_khKAWJzv%2oS)>JV<6(e11N@^QOG%;Ip>+2 z^O$9_5@b7a$a?GuIgi=(GJF4A7l;t{jcqXd#zdCq68FwSHYOr%SyokZpIc_$_Ey(Z zT~kwrrsv*!@7qnLs_Wi6!$IN+wHSVb!XP~tNUzp=iA46-tDQ+kwy_rp8;@j=u<=F0 zMj9Je&N+tlI+I2cHa-a(TgY&cu(9>9(OrN`)eF&aMnqW9PMG2`E;4<}PH6510WA4#+vX$VPI2 z@yhZf2PDpO1F*&vC+F7qMFtqaxXfeYzR9^Yb|C_W6!;2Hk#h{`dU-4+20jGAgzq>` zdfwL4Jk>Pa8{KOKkiY?zCX%pWga{xjnx1>xQuAi^hCSQ1nYneVxpi-vn|Z^bxSq?p zYi;ivuh#RmX>F@5ueN)=oO$jPCC0>ehz+06FgP6A*tl(O6uq6>+&ibHOG`V3k%BaS zKO{IE!UA;>ZE2f2C7mvMfx3vM>U0sY9R_|Im*qCiK4R@v{y458m zohG_SKemT@s%P7>T8U<9Nwe#j!{vJF>T}f^54YlKF1phi54V~%UyHWpzPVbbTOoKz ztZAA@j-VYnfN<=nZCh$XVYorqp&A?i+!`_%J8D4Mh2JCC)wCsj+uPLJ)H+QR5+sU| zgfUVM;HiN~aPJ+)?g%aX{$>zgO)K*L*mvme&^rBdliZ=@QvSi$O<)07Znv9!NFhq7Ag7A>7pWwDX?*7L|+3#oF|<+k&n zg>uF8D=BZ|K?`NZ;kYdS9+YI(`rV2t(8W-Jd;Lp*TE5`3E!iGBg&lQHz@xe zf(x_O?^1v%Q+Nb`QLFwv1SBb<1f_^l<}r$+Lp

!pvH~Bd?SzFu$TAub@!>exnPD zFDSmCO#gmk2NX_994|r%D4dj8=cf_>e#6SF^}B~Edpr>i>IWxp#mmiTuXKiov!MAfAJ zoraT5mCe(?(}-eY^HhuS?=*@vRW&suGpMO~qIvoE89~rgRq@aR%@fTNm8PmHS`o_3 z(`@GHh<~54LRA$IVO~#9Hgu_~0>F|TTu-CNTu(EwQiW?(eX!-{S9)!D6u2zud*zagn1>Zow}L47NBqFIV$T47))0 z=ekCZiSLiK8*2IJIW{igiyYr!kLb2FKo(|^qmRuy=J?BPVK*M)_K>4b=Aq*z1%d9c zrQ-;Ghi*(n`t^3m(Z{9-4Ix3I_=m*$+!jSEBqT`W5?S8N5VwVeganC(us|a5`(xp^ z9kRWen%E`!e8Cnu`ix06T=@OLLc<(DVpfPljy^U*B0St9b{LF_H8wsB#yz(O9DHN< z=e9WHs2;H~iyT!Na`efbbKD{?kwwG}5<_Mm8_z6vk$!BF5fL}Y49QY+&a|P?M?XQ1 zDv+2JvfkU4ZQYn{g(Sp8_E+P|IX0AiS|Op3l_5tT8_z9=hr}`DsOFqr6>RjMD5aM2 zh|iTS-a@K{5)H+Ig-q7OdH#Dw?ONnWw5JSC#9jRn>Zu z2_wVgg~jA0h6#s{TZVirMy-lL`DOU)1FBOCiiS7<&kL&(D!^c+NJ)+x2s>TQqY)74 zpdNO(z=(ti!2E@a8JK)v0Iw4&fMK@}7CKO)rN#zFeoJZ!U_b;;KvOm$A-04NL|zBk z1YYpyn6aC)xo@|1JykVT_ulBLZrcfh3w&dsY@;;2Vb2BvkeC(9(3?3#me;-R9W;d1 zk>W*W(Zd21R$@^;ukaQ*{QHY1PxVwSp#1xbBk%N1y6WF&V4TXS^br3(etdzq>fX z;=g|kipOb( zYKMxB6ci~aQX><;iMlC96{Ct##i-&7i))6vtb7dCy5TZlJ_ciPVI^T5@z=*{qv`w0-(*(#>vWzOo`Slcm95C@!5iVl`39KW8U||N|ahy@8CIce?A_)>?SP%jp zidAA_ubf}2E>EO<@E*?r%84O?-D@TW1%P^Tl^7X6Ilp3HKmr;x@u4whm8-a*VYPg4 zvgC@B-m{`oVwaOM%UQl_IZ^Teci}Q;d1f)eablr|8LGUTyifuN6cH73k<^o1q-9Ai zI+Ak!luaue4v5HWO11=J3Y@cuje+?Ejvv!&Fy`z>P}J9G=Jn&8u7>a zem_>zXtWY zGSi*HI8&lgi~~^k1kgGSz@_E^;7`$%6V8pAHxorxy{wIO=$rR3Ir%j zeSm1GMgw(K_r9H5wr#fzCUFeAyhQ6p1ry?cGDQ=XR z84*d6Bpo9bfM7TzB#dRTAZI;4697d8qu~He6bHh945A>0K^TTX5C%aIhCvWQL@B2j zqT2)gdjXhKF9XUOQiApd>>5QdAO}eUl=ca$1%UlV#*o8^gJ@Ak1Z;8uuRaWPSq#wk z1JFtTfL(+{?Y#lrGqeGyumh|&P+rfXAr1N87f__6%68BelADYXV$8Cr0t?~o0ihcQ zkW1Bp#qfEE2_=ZAsW=hT8)lhKx)mq|*Gz@AXGKY#r|NoPe%5Qdxi3^3R6p(fkuR_5 z>YGNDa`#_r9aA-OZCljUPdF>Z6=UR372^4MbjAejg@Srtt%uEU<(3R06T0#8%I$ht zg5!~ z@pQFBR7$EejH3wlF&=<~@_aNu0ale`p=>_YNPw|o^=wd!WDkgYgR^vtS-K#gl6F3a z%U)qi-aXm-RIgWa{{QU1eYKo#Uk&HqtH!zW!yz98fmA{%>)6XDM_C{jpI}n`j%U@O zBydptqtKPqv$8{uPHE~vCCP9M-xH~D3^SN94~7}Wq(V`WIZV#dgMf$@%i_Au3N08| z4c<$|??^^yqm|Sl0hi3JYLj6*aMsF75(;d7D(icWVAXophTHLXznvsf`s0|AKoSeK z1F$7+96l4p3BTAxEF|-&A-Gk$!ndIBXFM9jLXnmDf|6~}-KV$v|99a@<2|Npfz$x~ zcxCvd#D-32na1)@0=WicXzIj-H?YR8l}KR;7S7Bg+@RbDPKPfVRo%>*b67LR0;uQ^ z!hB<*#?K1#Tfl@U6heTOaGXFS5^PvN-8Y?Jz;@QO0Fk^S>jPJnL7+MLfk_%J)!TkX z&VVk|0~CuK#4uuy2#1xC-c5@ZfvCf4pl3^|-)MR1CBdOUoxVAP{g@cV$H#XbO5w1eiK+SrF_E>71gxW^BiaJW7zE^y`h|NBE#|r_wh+?g z47U+K#4Q_Vuc?GEz!Xf#{^(w^RsvIKj`;m>ayUyM6o>QWQ2ilbGJ3(~Ah`f0z{HK4 zUlUO|SY9)-@rNr-IopAxrF27V#(o^&A+POX4L0FLe)`~;4na6h->{*QE~Lw235FE! z0cd)t6W{d#5^E>CM}YSxfZ=q&@G-X018Pw_;06%F!vFx9m~{r65Du9AL_qGU7kFL72i6rO(pDxB zOZ*|su{1ClB{|DzQ2fKWqWZXeeu|^gg-D&hzM!%B`t-g2FFxbhoV8Ow+qr&T?7AOLkN-aU*8J@_&?R?}b#xMhP5mk;ZJU6}P=De6-6ppK+E zML>dX`mi|bXL^&$-fYtor)Eu3j$BSLOTs!8j8CxXSRRgu1!O9qJ<>QMt2|!oWCJhS z2+TL^y^;q|6@~mke{QM93D!Dr{z~ zF&F&52VoMgv#yOWHN@TczRlzq*9M6IsLU!r8q+39C01ez@e!53M=tA)Pu38E_Kj{0 zj{@xpL*De99!btdBQ!uq8l0dOdj1lrQ$ZL2m1y&yKuV8bR9A`jyGQLXNJI+8dyv9y z)*e{P1E`Qi=j9ORB4nJbz~TzJvBob86**pf5eKhA1!h1@7C7=DV5EiM+`63+AGDx8 z-G{Kpt;;HfLsW1$wm~a=^8-59GvTBG+=0$=1TCPYt!$0p#RY4>`6&d$K4a95;i(N3 zfbKm3x#dHp@5c9QVCCbM< z{v_~U@jmFLK%!O!o7&&@q#f^y26~UIxs|XH&Q9chs=5*2W|uaYzl@Oo*|9ongVD&4 zHs^@gw3!BZw#)P*>)8)!qn(6LRebu!$9g4V%SZlnKERqdBvEeIUwgSel~|c)=d+)x z=P2<%TdNStYIUx4Erv5CU%44yTXw!0lrTKRLUV-;fBLf>Bh+4P*oeyR z@xX>`wP*0y*bB#+PjiRfbo?8zOrVNTMwoJ&o?%D9t;P1M1|dwHZ)-dMQfd^vIcQJ@Hj;`W^=FNzqsVNxJBZ(_6Tkv^4ufCk zu#=?v`@|y_cIElN==^M;R!_|ArvT9F9Vy9C249y3T9603vbwMO{jCkyW+jtX^+zti zRwJKUupEp-07^Wj^jWlJ$7F!s1jbY5NoIyL*1f{Hxd*SnT1C5c>+fAPLri<$1hvW; zEBUS+61=MNM?Wsz`G+cGlX;)J@K;o`-12vOOW&R9%%5(MGqL?`y;UiVY)sbhgrMwg zgn*nTcGF~W4YV*OsstwtkEX)N+z;#yfTtyptyZDUn@mvjam9gQb$szE(T3rOrZQou z6{lR8UZ4l6&-k<-X{>|2WT9E!SacO!z<1hUK?`|-QYcpK!CM+|g=K;Y0|;vWZ0spz z0NYqlJywM>W~O!FeLhWTI{p*&d9v50>m1Ka?k{=DfQdC?SPq!_y?Mz?NVG6+O-fS4 zq}W@|Zbzm%W>Zk&i1}n@k}wW)q}m{RO_cdI)xH1GhvRg?Ky^j9s{Gs(rzG)y82sMr zZhjn~SU}xLWV=+d`dK0I*MVmZ*-n9paO4mE?rpC5K;r-Y#i7ew=8Ih$XJap{7_saN9X{f!Vu zA01CM9)MWQdQY(gdF<5f2gjdDuzoj(2m^wTMAhg5AH0Aw)bCgIfS{Nj^Ipq_1cy&_ zeb5hIh#+SOK-&2kAxb1zH>i%5LM)gHOcot2w?<>4Z!pmjozT6 zY~N=cybu#Wz`+2Bfa}hvWew*)YMmA8HLm#o0@@YIHS)MkDiB#O^W&9p8L#Gtnj+!i}+=5A9G>t>jOhnSl(D+sY z+4Wr74S{GkU(uJfVy;{U`tv627&l%*`qU~w6b#n+Uzkmvjdbl@^hjurfrxB0Wh_ao ztRd>R^EYHxtE%LQS)#rCgs6FH0ai%Vbo!i#xK;fKo5GC%kU1SKOj}D8N@i({S;&OS?I0ZTZ~VsajmF zy`g}U&=4mDtv`RU{-@Ss?|CqgW_6&xy4%Yg4Eg0LP%{m!l+xa=7?CtN{B4(eLfv8G z{TTd|7iR2Y-?^GT_DQ#Q=L(i*6}-vrP_743LcJx#N(r#0NOt8VvkAogLX-@Q2arCp zmjf)Q8{Y=7NvI_5D|s09J=3|`2Z%M^ltJ|Lw)8|7aGT&R%Ak6Bjzhg%5Gx*7r6o)F zZuJhqq`Iu-qd#kM{?xF%ezu|@r6JhVQ?w6@oD;Lf;S6O$<6#Vl1}$$s&;ahT+G~8S zKu(`UNwJ6x>+_JL_HS8WaY}@VSIDdKNeEow2R1^lH*#rBkKekDpJI$*E}x{cmolJu z*A;;}Od!xJm(Cku;Lo0wwLahoz~DlU=Gl*5!3rdg^0xp5T*fzGktzc~7$85hS-KZa zN!`AwBCP}@t@Uq!!_x!?_*(&pr=mf0j)fywmJ8EmZt9dhXX1-jD3lujt&L;d=bD(i@VK~ z(Dx{n2aCQF;di5tVbO3Q1_T(5ugOK~c`4*bluy_q5=jBA-TVFr4q^$z_JYb$*LSs5$R+J zh)@nddK&|7bOV7;q~2!?DhlHQO!)zLHj%1M4mRSOlBWX5zJTwu0a9ouo`9n;fGEuS zlQq_=R3ZT+2f#X*d!4uq07LQ7uH{g73LrLqT~QtlHDEJf&^H9YK02-r9MD8$BXdKk z4X_-6qVAS4w5DvFii{6C0aWe3J|ua-@6OP5}V>zj`$Jmby8jRC07v&pU4aq$n0e z<3b!a9b@FBhhsJM<*t-WFJqQ0&1yb?wm4QxF%RHn;Lr%*T0_9uduQl1Up=j6vay$Y zlLG`H_y7u51a!DcgLty!1CGyDU$o460aKKEaDer+ECq1VOqstU{cHAA(SBABNxT~J i|BaWFpo{<*j>ecwYS`SOe8#g!u&mJgea3BsE9nv^6%HZ* literal 0 HcmV?d00001 diff --git a/kernel/tests/data/v2-classic-checkpoint-parquet.tar.zst b/kernel/tests/data/v2-classic-checkpoint-parquet.tar.zst new file mode 100644 index 0000000000000000000000000000000000000000..87bca6f59c2827beadfb8db3f58b7684e6ad99bb GIT binary patch literal 8474 zcmV+#A?4mEwJ-euSd9n)+A|47Tp-Rl>l~J%7kUYrOXzP|eHqI{6GMtx05I0QP2t0@Y_hSN>C{h;S;s(-BONA^ZtRl8#3UGlNAEdN1 z!ZbPA;SexfKoba#sL;WeD+x%s05Jv&fTn8g8GIL5U=cbB@`#qAovbg|o}>t>B8nNX!)`lY1w(ythw%k?hv#-&|1S|+1| zlo!CjiF7+<(=|?89u#6g+*ZfKr9T~uYu)z6v8nsh1>G38WoBe%W@KhepLr95 z$;1S40V@osI+oSBZICD}(4|hNvgzJx>#n<|TY*PlPR9m=uddwzgF6=Y>R28Q=Cv#? zxKnw!HBEu+FZkxx?FO~1PE~g;{ZSMmy|XZek&tpht`oG@G}^4i`-9=#X5-XN;}-^Q z-45O>uLZ{Js{6GtZ>Q@nWq)uD7RpIKTP@A4X)dkRjzb&`nNIphCYgHYd5e>dbMK6W zzgH+sBA&PN2eBx;ov?iN&Up9|Po^Hd6A24rIe6aA8JKjOk!SCWjYH;>iHGc+U-+Hl zVNOW#m=l(OY$4NYuFb77GGb9i9%H3kE(QX8jc@M9u_%+>W?JtMc@w31m(yz(?ke&- z?wayim#$qqZKiGdeI`oeT~1H)rNeS5n`sl)&R$M4QQC|)qj?R>rL6pUxi0-3#z25b zq#(!Fh_oTP%}Uo=KM^;dHq-iNrJJ%@2~##JP0UW{_{Tr51WU__8~KrIPjkS)k+!SO)wS))vhLnHH^gXsiOAgE8;JY0y8Zj{*bToa zt_|)N$8ML!@nU%6TDOgNycUQ{_ud;fA73MGNe;$G<4-J$ zOI@plP}6av27GI1UFz7&)P!*iM93@8IpQnN$oCf`_twXEMkei=UR!soySCYNr+aO6 zY+74cb??2m>(<&s&#z25Lpo~PmLLg36LdpfP7*!G$c!2Zx z*K9R29EeIcAHJ$>=vk^$Wmvn!#sIpYE#uEO}~J+56Fvv^T9zz7!@ptWY9n`#JZr%d1k|M>{7lEu*)Rthlz z7rJ=OY307r8dh!r!i5ufkMn39Ba4|%iy`8M9>BPcP<z>k7j! zo32QCNk*{fSUetOtZB<@U3Ohvb-TKC^IEk{CihHCDhdgONkn$(9iB(G#*U&K1UP%Kv3TCtHr;M9NGt-LOfK7| zx^$;Hw^es-6N^X^a_LTTAP=~{aT|O$aPxrd`a65)FbT<9cYAeL z-d2ISbd7gAjbWUsrazpzds!UY0#h6>=(fLDjdN*_n~!9&$J;Egy(Ldo>Fk|DfU>ZN z<7DdHeDvtu&ZLi{qubFBVo?aoM@&6>=h4yaJUZ%~M@LL&@0Lt5_0C@JcK*~|-0IZy zRo9IR!Y>c!vOyNN?t(DxWqIkFvhCSBVfke0G2N0$A187qzVh)^BFg9O93mv%fQhlu z_@1|$j~>07u6J8|Og*L^Js^x&D9<1kiRVpSck1eR>dISnY45gkb-VNozPc3{RACtC z#yIv>cR1FCSzO$vIvmX7vAQ_;J9|vMQ%F@hZ%H37FotC$QGSO-SjaJHe&ZR(Sw0eg zC=ZV@{Kh$m@=#a=7U$y82*>jqAA#>N>4zL77J@A3E!lY~Vf9Yy0i zJQjnqQ9c5ZF+B9n@vue=6XSU)HV)qz84u+hO2uJ(jA2=+c*OC8C?De)nIr`AnC0QH zC?uqqV-_a8!59`&%n>8s-%<<{i-Z(&EWJZ!S@;rV`1k^k@sD^W%5#hi28&8VF;Ny0 zi;g#d%&+msdo0e#V{rz?u`q^zyz~6dNa7m{i^bm$;tNFRonLq;?;sZC`31^B87>Zm z!Xz6=qWlI@Sw2vFp!AGf@lx}`8{*%IXsAJ{K|wVQ=S98DiCS=d8zezpQtxu|HX9^C zwGH3p@b5%OY_RwBNTDP=2|ZFfCq~3#u^1K>s&lS0q6Zu7eJYlqL_cIxu>{pFTsK1% z8|-~FDkzROqESIL>N+JSWxf<#P!XO-3NENta<|F9djMsFy^p5|)Pz4k z5iOX1_rM5G6cJAps>>F9?crrUql*ppJ{cA2>hi85WK^rwzt7NuA`3;<>g3;NOhNI4 z+ISivpm;)c&($IRea4gx_P)15d3=uDTV4J8jHfUjMxFY18DZSYtx)ZP;(P05E@KEA z?0q{_s7r}y+o9S&{JRV+C$vsz?J55*L&^!N6I5I3-(|d@qot#@jr_X|B|20(R6#@h z`_LB?S|+rDuKs-}%LJ7Ps-UTVAG(sGm7^7ukDlY#XqsZ2ll`dvg zRSa6Twg8}HO024MTC7TIYHJ7E;$Vk=hw%if+QF(gnBm`Fgpu(gG~q+s}sk!sNdCJSogX=KR~qC|okJ<*UjNDK_W zF+?*B`zevwki!)Wzpg9WlYbXL4K5@uBsL^35D}re$AOCY9T&g~rt34&7w>!XOD8VP zF*X~gnJA2{8Et01>9LpV#AM}HtZaeIukrN`!*5MYYnqY%k&%@@Hm+e#T$*EKWvg?5 zTeH%EqEIYo(+|}aU$JF{(`n`7#Rp2wOAU&YZ;*rp?~%eHaurK(IE{)3o`Q=B&Qk~WubWD;cw+(+{&fIcTSwp(J^6yDn~1as)SYvDI6_0Om41hwrr}lYO1zqs54yhL*4LPDhfg_}<^WwTO!A)A$^Y*y0P#I)8i zFH==2WwY{SvvS3Ym~2+UepWgQ*{n2WGcB9BOw6=4V@~)04^c0i7C4ZC58mTEUOdod zj1VkDIl)iwVZnknabyVM=>#$8np~8#=3ZBurv>{vOW{@pgUpE;>~@RX$Sx4zz=hk$(rOGBL!z13k&H>Dtx51Fc}f_Nq<&I||0uljLQN zX0cnAoQX+AUVFo)UEd8BS~7W=Aa1Z!c=>nq$_>;FlnN~Wj?!R*WrL+chJQz$*g)Ap zsd)N#)Tsum28)KozyBOj162b>+w$)}KhR*&V9})f`_B~(6b%$D>fe84n8BLCN)hJY zf1+lfW}t%j_n#x?74u38D*w(x%WN{6PAD!3MtaKDkR^G{qJ6YLrvf`vuWt3B*Wa2ki zH&9qmSWs9{SWxB_vnhthtd<;BWro9=o;G`NdA*+8eA*DNEnTvib`g zwcg61K>(9#y(34e^^+wZXsts8p4K{OB-@wP!6d0Un6X>CdQ0wG+dFUQDJ&%lfVdJR z1MU*SWuSoYD#8a8U~nDBDT`{NjNl3qAjqUZ1U4XG=|$iN2$I1QW_m!4ul^NRKGf)gQJ<~yz=j_~DZ{qmRlYE;Pbfh*3!YyNWm zT7okJ3RPg3GMPjm2NsTKG|@*p2&1@i^br**eMCg-5VY1I=&4w%OycHA4h&$yW3pVv zi%z#!RaI4YtLYF`4d9{Smn0GrQh_BHFhG_TG=N7r!vHYw0S;fJMhNl&0xYNykjp*M zp_W2y1Qwh%l%Rl;VsXR=s?ZH4=;Ferg&0|!Ob|4J7YKcz0w)0C0ue{VpoDaewPtmy zs!Lbh)~)W2xAN9qE&%aRo`uELRh125-Ihgi*EUGUs=Kz4NKs%qs09~jCEyV!p~QtGOe|Zs*S&XcO;=al zwVm!*iqiiIL>X21b-PIRVO$x>r?giA5v{ZWOIG4aOD#ssLrZ ziJFg2z3y=rM*KUCC!i>xC>L46pjc2WE|_q!!QO{9p*Y?MZ9=sR55SgxFWSKdd*2Tg zisOxbs5+4HGB1+yv%&r+^R3u|y0(0oWW|;aqW-<;g%wVyji<4~sTbzoi1sN<#@DGf4iYGg)4k|aq7^A%uVI3ywq#eB*F{RL0))-ct+StCWnNdy)@C0s}Ckt&& zW1gpkcEN+LE}iM-sG86I@A)mhw5BUlm9K1Pi(Bgf@{OPUsZ;{Ai{SaN31SaAobnWS ze-Q=6im&Xl-7l1)QSNn)rP_i~6563ly3!rmdr3;1B68X>GuAL0a5iv`@rC$1cx8Ht zkbQqB1tyBs3L3ljqb-nFLNWCf2tB5k)V0Ab5egum*k=<$yH@9c`~VB+IXr3cr@{0f zBuIDb0olb~keebgYhxGPZE)Y_Z{hoqW+S{bx z9lhk0*s-gJOAmuWB0Z$AmP%$cWLyN|Vwjx~_48s^{yq4b89QSI^gfbDLv8(Z&t!(o z-_RDD22lC!g2o5@`A4$fU0&C$I`8rOZWB{9w; z6W{Ef#e!d?&HhXSg3uo-ki@!T$9azy-i}mX|GLIRcrWWbG7!?qrKT$)+*eG>%A~wn zs#O)NbUP$IT-Q)m&=BOGQ77B(Vfr^phrzCC@mXU*fgD&%(#8vT2rb0!NTDsG<@dc& z6R(RcpQzMg7XmJ(bcGdC`nVp&Jm8xLR5#m2UdoIS0^YTlpOb^g;QcDFFtbkxHwQ*f z^DJ(x&>nG9Dp`>#ijfLN)hlH0zCL?S8)O(Ij2EswQX|^NV_hZW3#eyZ zZC%Xf)XSNnUG-E?WmS*sW^Fvc1SV99Mltyz%ncJLibly0-s0_8)!L zBx)c;0c1nAt1KR)C6Zgf4On8NDlfiVzK>--3(E>h`e?@6K=ctBVW!EJCKP2`WTr4 zM{~#kUF(ucLt3D)N)!@#F2~_oDkC*5K6&`U}(w^F!AkhV8D5RNnXTtqvjEn1D`cS8~^b%?oS#N zs<&fBGv4^X(v3e$hAiB+k+2XiBF8>t1QI@xx-x8NqAr9z*`lI<02m6O9sjqC0crGy zB2Bq(K>gF1|Vjl;#fAo8tMUd{*Z?i$`Su#5RM+e1jaHCFqw1- zGyp+vM#gFXqy^-dZlb88`;YkmxSO&8{nrCztyXd!92p}$AoDs0kdJ{3?bVb5uB|D` z6akD8(Huno5n!g<16VMLMsZC=FzTuE-INLzpCtlw$Gb_-EKup$Ub(bmow-hsL7=47 z7{aPYqUnrp{bkA#H%*fcG|=|4`q==V6kjEA=?_<9*A(K}s?Sh2$6mh#v zYq#x9Xf>z>Vvyv`i6jJ7Pvr?bZnfJns(~GkdZc$eSDP`9$o$Uy!Pgt!p|HCtDvXK} z7_e!Bc8R%6em29Nh7|4CSQ}ZP55^9BZ%!$*e7L{@WFNs2Qc6{q;S~gk0HBTwO^Cu! z!_RfaVNnbb+{9xIUp6Q?Ee-xEgf(nDV6VrR?qdo^!xm)HGR9R~T%azuwWZ&#G}&#A zEGz>>NI4bDMC8fsvb!|{C(CtubO8(q3$1&5NwW0rm3z)P4Kjh~lyB6m9`M;$$$!WM zOfuNuH`7o%N*a4s;8hpCeed18TFFDkQ49tP22T*QgOOmrKUm|Gjz&8at8bn*LE=== z?22K>@>C4=$}dx}4s8)vhC^4M%=O>T(^~DRHSeqMO}okvgRi?dOa%t zwOR`nPKi?s`jaczOc7U9XZ>>HD3k0u*c{7S?0_l-wze4L8)&2xc~zE&$CfdTV9U4> zfUYDJT+Ly3#R;B$TtgaPRD`1v-$sw{c*h@6A*Yqy4#c*~W*Ol|vhZLF&tF|83&Vj6 z8(N#uT+M!=fgh{F{kj{o0-vA|7Xp~CJ?NC6cb_;8xw%4IPKz~FVsLHf7z%1b@e%s(;bxG-e3U3qx7d@;bF&TVa+f<^bAWRKvzoT8FTBQsYFh| zLHUQ@?nGHM3Axk4Ue0FoV*F#s?|Re?@B^JARM~sPGZhcqL%rWyHDbgd>yVe{>qD^Y zL-HAiD9ujsAo?CfzU~8eI9bTRLxZC_U3*bHq%>rvNB7T1Gt7CSXANNxGy{qd--=5C z*J&t^2U8cwrbjNPV4NPU=vJ4Ga86jK-eEeAUiEss=`3Ol2}#b;(4TGRH$nSdf2EY~ zvX3n_+<%k;(XZ!rBmRUvG7bS#_38PjVOTbGF=KXX<1#)~zfNaxbe5jntsU4KM>Qk* zmabnjB8~x88|iCbJ~tkGP1ymv7q_{*!v71HbDcss7szzLN=g+c=3>ns10>aN~qXTQ#4R5;(kgjZRZ=Oh6upe zPeej!z-d)7F3IqkqCmnD#2Z;6Y9EAdra)}A5DzN}XnX2WaUF6lB7gmPJS5p(fP*F^ zUa>L)?oK$dB2Lm4^f1?c>Ndo&n-QU2GmXC22{o8i`uiaar8h3f1d39yw-=OxT`&_* z9opDzSP2K%K;S1*3d>)qaHWXsgf*(YSu3Fewg{c4Q`BghzY_VH1)N=&Ti5Kc%-uk< z*bB$awT$srFR=4v*PT*NrJ`Pj8CD%4mO8D_Q-nvy05lhC6@xs&Ku%3d*5NoL4o9}! zTY^>ht{wBNLIONc=@wR_V#Gsp*7UtjkhCKxc2S<##a3|EIXL1;_DtVRcqE-KVvcb+ zfGg@xjT=LT+w+%^3qft_IhUIp^-53x48=c&OwrQilINct(?uXsl&ZajC2n5`YcE60 zK&=rVjYonA?M=}M@RD()zFqnREiii7`X((>0OK_3_O?O=Z-|1IaCcx{Nl-%Z97F*J zm?tEz#d4~N08(BX3U zdZ8dxoJaud*eBZRiKB^qeZ^;kh~L4bT+J+WHRT{KNhtI33lkXdga*(Id{5KP zA1>t+HEi60&&XyWayFhc}#DKAmJg}+#$O(U#EI<43GBZ`o2H=I zs(vK6dnLk|NH`U<7epk)LhV#|WtTb)Jjrc87jJhBc1l`w0x(}P$`c@_NRnf8Omc(; z8CHpq;s_@vm3+FwU8d#@LFv9$C=_T|j1SE(_~da24MP@`myjnPZOg!FDYF=923?0L zBzgxR@)v;km#;;QMODU>XEi6yQr9Z;(#576Z?AW-h<)eoesH1_SXW3)Tj5EuD9tew zf|7jB)sv5T&5uZx?j&_H@d~m@QT`BEZ^ITt#Uh%ZoIfK!V`D|)4)tFt?|RFA98R+4 z4$2`%k!)1L1*!clxRc>3z3sQ$SDgwCD|#Qg2?K*TLyzxylD^ph!}|DytFcbFr=Y@2 zi2NDnK(1kK-11)irNl^CcVwsFV2G6gT&xXh4wi9Yfg#LWYeQ?80;_E4iq-Qz!U)9f zqi;luu(G ze(=~8QYZtUbhG@WAlkNBXch?gdqJwFLwJb@u?o-47@#1IcFcCe?{&W**v{Zyt-+)~ ziUUbWY{l%qC}%vGy!lSURQn;FfY5-A`9#0tbbE~{o1kW8{4{UjB(R2N*kx#NiMnA* z>A_Z`OvM(Xzp??$TBgPaoZf>2@GdR~4^V+VV9-23GZfmc=8q65jO;*;1s&jd(HQ_F z44|m~c^`l?Gz>76D|BEI!Sne8&^Yk_@TRz7 zPjvwAzV2w7wTT!$@J4=si`DIR!|YP+gArRRptoNEc<^f!-_orHU_6E)At*d$6K)*V zr|uRW?Z;dO_zVOSK-LUg8JiEF1Ocl6nAU*hn*rwX0A{c4#eAo$LEsqd0OBc1=?4Ix z#zyM_WKaG{43sgI4LCNyatL%dfC)xmLQMAl4E=++h`ahvVGt({U=AV(G}kie2;%?KsU8-BIyk@*D^ I5iDWR0sZgU5&!@I literal 0 HcmV?d00001 diff --git a/kernel/tests/v2_checkpoints.rs b/kernel/tests/v2_checkpoints.rs new file mode 100644 index 0000000000..4384ed6e0f --- /dev/null +++ b/kernel/tests/v2_checkpoints.rs @@ -0,0 +1,224 @@ +use std::sync::Arc; + +use delta_kernel::arrow::array::RecordBatch; +use delta_kernel::engine::sync::SyncEngine; + +use delta_kernel::engine::arrow_data::ArrowEngineData; +use delta_kernel::{DeltaResult, Table}; + +mod common; +use common::{load_test_data, read_scan}; +use itertools::Itertools; + +fn read_v2_checkpoint_table(test_name: impl AsRef) -> DeltaResult> { + let test_dir = load_test_data("tests/data", test_name.as_ref()).unwrap(); + let test_path = test_dir.path().join(test_name.as_ref()); + + let table = Table::try_from_uri(test_path.to_str().expect("table path to string")).unwrap(); + let engine = Arc::new(SyncEngine::new()); + let snapshot = table.snapshot(engine.as_ref(), None)?; + let scan = snapshot.into_scan_builder().build()?; + let batches = read_scan(&scan, engine)?; + + Ok(batches) +} + +fn test_v2_checkpoint_with_table( + table_name: &str, + mut expected_table: Vec, +) -> DeltaResult<()> { + let batches = read_v2_checkpoint_table(table_name)?; + + sort_lines!(expected_table); + assert_batches_sorted_eq!(expected_table, &batches); + Ok(()) +} + +/// Helper function to convert string slice vectors to String vectors +fn to_string_vec(string_slice_vec: Vec<&str>) -> Vec { + string_slice_vec + .into_iter() + .map(|s| s.to_string()) + .collect() +} + +fn generate_sidecar_expected_data() -> Vec { + let header = vec![ + "+-----+".to_string(), + "| id |".to_string(), + "+-----+".to_string(), + ]; + + // Generate rows for different ranges + let generate_rows = |count: usize| -> Vec { + (0..count) + .map(|id| format!("| {: Vec { + to_string_vec(vec![ + "+----+", + "| id |", + "+----+", + "| 0 |", + "| 1 |", + "| 2 |", + "| 3 |", + "| 4 |", + "| 5 |", + "| 6 |", + "| 7 |", + "| 8 |", + "| 9 |", + "+----+", + ]) +} + +// Rustfmt is disabled to maintain the readability of the expected table +#[rustfmt::skip] +fn get_classic_checkpoint_table() -> Vec { + to_string_vec(vec![ + "+----+", + "| id |", + "+----+", + "| 0 |", + "| 1 |", + "| 2 |", + "| 3 |", + "| 4 |", + "| 5 |", + "| 6 |", + "| 7 |", + "| 8 |", + "| 9 |", + "| 10 |", + "| 11 |", + "| 12 |", + "| 13 |", + "| 14 |", + "| 15 |", + "| 16 |", + "| 17 |", + "| 18 |", + "| 19 |", + "+----+", + ]) +} + +// Rustfmt is disabled to maintain the readability of the expected table +#[rustfmt::skip] +fn get_without_sidecars_table() -> Vec { + to_string_vec(vec![ + "+------+", + "| id |", + "+------+", + "| 0 |", + "| 1 |", + "| 2 |", + "| 3 |", + "| 4 |", + "| 5 |", + "| 6 |", + "| 7 |", + "| 8 |", + "| 9 |", + "| 2718 |", + "+------+", + ]) +} + +/// The test cases below are derived from delta-spark's `CheckpointSuite`. +/// +/// These tests are converted from delta-spark using the following process: +/// 1. Specific test cases of interest in `delta-spark` were modified to persist their generated tables +/// 2. These tables were compressed into `.tar.zst` archives and copied to delta-kernel-rs +/// 3. Each test loads a stored table, scans it, and asserts that the returned table state +/// matches the expected state derived from the corresponding table insertions in `delta-spark` +/// +/// The following is the ported list of `delta-spark` tests -> `delta-kernel-rs` tests: +/// +/// - `multipart v2 checkpoint` -> `v2_checkpoints_json_with_sidecars` +/// - `multipart v2 checkpoint` -> `v2_checkpoints_parquet_with_sidecars` +/// - `All actions in V2 manifest` -> `v2_checkpoints_json_without_sidecars` +/// - `All actions in V2 manifest` -> `v2_checkpoints_parquet_without_sidecars` +/// - `V2 Checkpoint compat file equivalency to normal V2 Checkpoint` -> `v2_classic_checkpoint_json` +/// - `V2 Checkpoint compat file equivalency to normal V2 Checkpoint` -> `v2_classic_checkpoint_parquet` +/// - `last checkpoint contains correct schema for v1/v2 Checkpoints` -> `v2_checkpoints_json_with_last_checkpoint` +/// - `last checkpoint contains correct schema for v1/v2 Checkpoints` -> `v2_checkpoints_parquet_with_last_checkpoint` +#[test] +fn v2_checkpoints_json_with_sidecars() -> DeltaResult<()> { + test_v2_checkpoint_with_table( + "v2-checkpoints-json-with-sidecars", + generate_sidecar_expected_data(), + ) +} + +#[test] +fn v2_checkpoints_parquet_with_sidecars() -> DeltaResult<()> { + test_v2_checkpoint_with_table( + "v2-checkpoints-parquet-with-sidecars", + generate_sidecar_expected_data(), + ) +} + +#[test] +fn v2_checkpoints_json_without_sidecars() -> DeltaResult<()> { + test_v2_checkpoint_with_table( + "v2-checkpoints-json-without-sidecars", + get_without_sidecars_table(), + ) +} + +#[test] +fn v2_checkpoints_parquet_without_sidecars() -> DeltaResult<()> { + test_v2_checkpoint_with_table( + "v2-checkpoints-parquet-without-sidecars", + get_without_sidecars_table(), + ) +} + +#[test] +fn v2_classic_checkpoint_json() -> DeltaResult<()> { + test_v2_checkpoint_with_table("v2-classic-checkpoint-json", get_classic_checkpoint_table()) +} + +#[test] +fn v2_classic_checkpoint_parquet() -> DeltaResult<()> { + test_v2_checkpoint_with_table( + "v2-classic-checkpoint-parquet", + get_classic_checkpoint_table(), + ) +} + +#[test] +fn v2_checkpoints_json_with_last_checkpoint() -> DeltaResult<()> { + test_v2_checkpoint_with_table( + "v2-checkpoints-json-with-last-checkpoint", + get_simple_id_table(), + ) +} + +#[test] +fn v2_checkpoints_parquet_with_last_checkpoint() -> DeltaResult<()> { + test_v2_checkpoint_with_table( + "v2-checkpoints-parquet-with-last-checkpoint", + get_simple_id_table(), + ) +} From 725dc70daa13dc73034d950809e5bf5c61070d98 Mon Sep 17 00:00:00 2001 From: Ryan Johnson Date: Wed, 12 Mar 2025 10:56:21 -0600 Subject: [PATCH 28/38] feat: Add basic partition pruning support (#713) ## What changes are proposed in this pull request? Add basic support for partition pruning by combining two pieces of existing infra: 1. The log replay row visitor already needs to parse partition values and already filters out unwanted rows 2. The default predicate evaluator works directly with scalars Result: partition pruning gets applied during log replay, just before deduplication so we don't have to remember pruned files. WARNING: The implementation currently has a flaw, in case the history contains a table-replace that affected partition columns. For example, changing a value column into a non-nullable partition column, or an incompatible type change to a partition column. In such cases, the remove actions generated by the table-replace operation (for old files) would have the wrong type or even be entirely absent. While the code can handle an absent partition value, an incompatibly typed value would cause a parsing error that fails the whole query. Note that stats-based data skipping already has the same flaw, so we are not making the problem worse. We will fix the problem for both as a follow-up item, tracked by https://github.com/delta-io/delta-kernel-rs/issues/712 NOTE: While this is a convenient way to achieve partition pruning in the immediate term, Delta [checkpoints](https://github.com/delta-io/delta/blob/master/PROTOCOL.md#checkpoints-1) can provide strongly-typed `stats_parsed` and `partitionValues_parsed` columns which would have a completely different access. * For `stats` vs. `stats_parsed`, the likely solution is simple enough because we already json-parse `stats` into a strongly-typed nested struct in order to evaluate the data skipping predicate over its record batch. We just avoid the parsing overhead if `stats_parsed` is already available. * The `partitionValues` field poses a bigger challenge, because it's a string-string map, not a JSON literal. In order to turn it into a strongly-typed nested struct, we would need a SQL expression that can extract the string values and try-cast them to the desired types. That's ugly enough we might prefer to keep completely different code paths for parsed vs. string partition values, but then there's a risk that partition pruning behavior changes depending on which path got invoked. ## How was this change tested? New unit tests, and adjusted one unit test that assumed no partition pruning. --- kernel/src/predicates/mod.rs | 2 - kernel/src/scan/log_replay.rs | 104 +++++++++++++++++++++----- kernel/src/scan/mod.rs | 2 +- kernel/tests/read.rs | 135 +++++++++++++++++++++++++++++++++- 4 files changed, 219 insertions(+), 24 deletions(-) diff --git a/kernel/src/predicates/mod.rs b/kernel/src/predicates/mod.rs index e47da293f8..c2d870445f 100644 --- a/kernel/src/predicates/mod.rs +++ b/kernel/src/predicates/mod.rs @@ -534,8 +534,6 @@ impl ResolveColumnAsScalar for EmptyColumnResolver { } } -// In testing, it is convenient to just build a hashmap of scalar values. -#[cfg(test)] impl ResolveColumnAsScalar for std::collections::HashMap { fn resolve_column(&self, col: &ColumnName) -> Option { self.get(col).cloned() diff --git a/kernel/src/scan/log_replay.rs b/kernel/src/scan/log_replay.rs index cebce5b6c4..0e26b610f7 100644 --- a/kernel/src/scan/log_replay.rs +++ b/kernel/src/scan/log_replay.rs @@ -10,7 +10,8 @@ use super::{ScanData, Transform}; use crate::actions::get_log_add_schema; use crate::engine_data::{GetData, RowVisitor, TypedGetData as _}; use crate::expressions::{column_expr, column_name, ColumnName, Expression, ExpressionRef}; -use crate::scan::{DeletionVectorDescriptor, TransformExpr}; +use crate::predicates::{DefaultPredicateEvaluator, PredicateEvaluator as _}; +use crate::scan::{DeletionVectorDescriptor, Scalar, TransformExpr}; use crate::schema::{ColumnNamesAndTypes, DataType, MapType, SchemaRef, StructField, StructType}; use crate::utils::require; use crate::{DeltaResult, Engine, EngineData, Error, ExpressionEvaluator}; @@ -30,7 +31,8 @@ impl FileActionKey { } struct LogReplayScanner { - filter: Option, + partition_filter: Option, + data_skipping_filter: Option, /// A set of (data file path, dv_unique_id) pairs that have been seen thus /// far in the log. This is used to filter out files with Remove actions as @@ -47,6 +49,7 @@ struct AddRemoveDedupVisitor<'seen> { selection_vector: Vec, logical_schema: SchemaRef, transform: Option>, + partition_filter: Option, row_transform_exprs: Vec>, is_log_batch: bool, } @@ -82,29 +85,54 @@ impl AddRemoveDedupVisitor<'_> { } } + fn parse_partition_value( + &self, + field_idx: usize, + partition_values: &HashMap, + ) -> DeltaResult<(usize, (String, Scalar))> { + let field = self.logical_schema.fields.get_index(field_idx); + let Some((_, field)) = field else { + return Err(Error::InternalError(format!( + "out of bounds partition column field index {field_idx}" + ))); + }; + let name = field.physical_name(); + let partition_value = + super::parse_partition_value(partition_values.get(name), field.data_type())?; + Ok((field_idx, (name.to_string(), partition_value))) + } + + fn parse_partition_values( + &self, + transform: &Transform, + partition_values: &HashMap, + ) -> DeltaResult> { + transform + .iter() + .filter_map(|transform_expr| match transform_expr { + TransformExpr::Partition(field_idx) => { + Some(self.parse_partition_value(*field_idx, partition_values)) + } + TransformExpr::Static(_) => None, + }) + .try_collect() + } + /// Compute an expression that will transform from physical to logical for a given Add file action - fn get_transform_expr<'a>( + fn get_transform_expr( &self, - i: usize, transform: &Transform, - getters: &[&'a dyn GetData<'a>], + mut partition_values: HashMap, ) -> DeltaResult { - let partition_values: HashMap<_, _> = getters[1].get(i, "add.partitionValues")?; let transforms = transform .iter() .map(|transform_expr| match transform_expr { TransformExpr::Partition(field_idx) => { - let field = self.logical_schema.fields.get_index(*field_idx); - let Some((_, field)) = field else { - return Err(Error::Generic( - format!("logical schema did not contain expected field at {field_idx}, can't transform data") - )); + let Some((_, partition_value)) = partition_values.remove(field_idx) else { + return Err(Error::InternalError(format!( + "missing partition value for field index {field_idx}" + ))); }; - let name = field.physical_name(); - let partition_value = super::parse_partition_value( - partition_values.get(name), - field.data_type(), - )?; Ok(partition_value.into()) } TransformExpr::Static(field_expr) => Ok(field_expr.clone()), @@ -113,6 +141,24 @@ impl AddRemoveDedupVisitor<'_> { Ok(Arc::new(Expression::Struct(transforms))) } + fn is_file_partition_pruned( + &self, + partition_values: &HashMap, + ) -> bool { + if partition_values.is_empty() { + return false; + } + let Some(partition_filter) = &self.partition_filter else { + return false; + }; + let partition_values: HashMap<_, _> = partition_values + .values() + .map(|(k, v)| (ColumnName::new([k]), v.clone())) + .collect(); + let evaluator = DefaultPredicateEvaluator::from(partition_values); + evaluator.eval_sql_where(partition_filter) == Some(false) + } + /// True if this row contains an Add action that should survive log replay. Skip it if the row /// is not an Add action, or the file has already been seen previously. fn is_valid_add<'a>(&mut self, i: usize, getters: &[&'a dyn GetData<'a>]) -> DeltaResult { @@ -138,6 +184,24 @@ impl AddRemoveDedupVisitor<'_> { None => None, }; + // Apply partition pruning (to adds only) before deduplication, so that we don't waste memory + // tracking pruned files. Removes don't get pruned and we'll still have to track them. + // + // WARNING: It's not safe to partition-prune removes (just like it's not safe to data skip + // removes), because they are needed to suppress earlier incompatible adds we might + // encounter if the table's schema was replaced after the most recent checkpoint. + let partition_values = match &self.transform { + Some(transform) if is_add => { + let partition_values = getters[1].get(i, "add.partitionValues")?; + let partition_values = self.parse_partition_values(transform, &partition_values)?; + if self.is_file_partition_pruned(&partition_values) { + return Ok(false); + } + partition_values + } + _ => Default::default(), + }; + // Check both adds and removes (skipping already-seen), but only transform and return adds let file_key = FileActionKey::new(path, dv_unique_id); if self.check_and_record_seen(file_key) || !is_add { @@ -146,7 +210,7 @@ impl AddRemoveDedupVisitor<'_> { let transform = self .transform .as_ref() - .map(|transform| self.get_transform_expr(i, transform, getters)) + .map(|transform| self.get_transform_expr(transform, partition_values)) .transpose()?; if transform.is_some() { // fill in any needed `None`s for previous rows @@ -250,7 +314,8 @@ impl LogReplayScanner { /// Create a new [`LogReplayScanner`] instance fn new(engine: &dyn Engine, physical_predicate: Option<(ExpressionRef, SchemaRef)>) -> Self { Self { - filter: DataSkippingFilter::new(engine, physical_predicate), + partition_filter: physical_predicate.as_ref().map(|(e, _)| e.clone()), + data_skipping_filter: DataSkippingFilter::new(engine, physical_predicate), seen: Default::default(), } } @@ -265,7 +330,7 @@ impl LogReplayScanner { ) -> DeltaResult { // Apply data skipping to get back a selection vector for actions that passed skipping. We // will update the vector below as log replay identifies duplicates that should be ignored. - let selection_vector = match &self.filter { + let selection_vector = match &self.data_skipping_filter { Some(filter) => filter.apply(actions)?, None => vec![true; actions.len()], }; @@ -276,6 +341,7 @@ impl LogReplayScanner { selection_vector, logical_schema, transform, + partition_filter: self.partition_filter.clone(), row_transform_exprs: Vec::new(), is_log_batch, }; diff --git a/kernel/src/scan/mod.rs b/kernel/src/scan/mod.rs index 13a15ffb7e..ccdff3d663 100644 --- a/kernel/src/scan/mod.rs +++ b/kernel/src/scan/mod.rs @@ -406,7 +406,7 @@ impl Scan { // for other transforms as we support them) let static_transform = (self.have_partition_cols || self.snapshot.column_mapping_mode() != ColumnMappingMode::None) - .then_some(Arc::new(Scan::get_static_transform(&self.all_fields))); + .then(|| Arc::new(Scan::get_static_transform(&self.all_fields))); let physical_predicate = match self.physical_predicate.clone() { PhysicalPredicate::StaticSkipAll => return Ok(None.into_iter().flatten()), PhysicalPredicate::Some(predicate, schema) => Some((predicate, schema)), diff --git a/kernel/tests/read.rs b/kernel/tests/read.rs index b5b0849d35..6bd115d1e2 100644 --- a/kernel/tests/read.rs +++ b/kernel/tests/read.rs @@ -567,6 +567,26 @@ fn table_for_numbers(nums: Vec) -> Vec { res } +// get the basic_partitioned table for a set of expected letters +fn table_for_letters(letters: &[char]) -> Vec { + let mut res: Vec = vec![ + "+--------+--------+", + "| letter | number |", + "+--------+--------+", + ] + .into_iter() + .map(String::from) + .collect(); + let rows = vec![(1, 'a'), (2, 'b'), (3, 'c'), (4, 'a'), (5, 'e')]; + for (num, letter) in rows { + if letters.contains(&letter) { + res.push(format!("| {letter} | {num} |")); + } + } + res.push("+--------+--------+".to_string()); + res +} + #[test] fn predicate_on_number() -> Result<(), Box> { let cases = vec![ @@ -604,6 +624,118 @@ fn predicate_on_number() -> Result<(), Box> { Ok(()) } +#[test] +fn predicate_on_letter() -> Result<(), Box> { + // Test basic column pruning. Note that the actual expression machinery is already well-tested, + // so we're just testing wiring here. + let null_row_table: Vec = vec![ + "+--------+--------+", + "| letter | number |", + "+--------+--------+", + "| | 6 |", + "+--------+--------+", + ] + .into_iter() + .map(String::from) + .collect(); + + let cases = vec![ + (column_expr!("letter").is_null(), null_row_table), + ( + column_expr!("letter").is_not_null(), + table_for_letters(&['a', 'b', 'c', 'e']), + ), + ( + column_expr!("letter").lt("c"), + table_for_letters(&['a', 'b']), + ), + ( + column_expr!("letter").le("c"), + table_for_letters(&['a', 'b', 'c']), + ), + (column_expr!("letter").gt("c"), table_for_letters(&['e'])), + ( + column_expr!("letter").ge("c"), + table_for_letters(&['c', 'e']), + ), + (column_expr!("letter").eq("c"), table_for_letters(&['c'])), + ( + column_expr!("letter").ne("c"), + table_for_letters(&['a', 'b', 'e']), + ), + ]; + + for (expr, expected) in cases { + read_table_data( + "./tests/data/basic_partitioned", + Some(&["letter", "number"]), + Some(expr), + expected, + )?; + } + Ok(()) +} + +#[test] +fn predicate_on_letter_and_number() -> Result<(), Box> { + // Partition skipping and file skipping are currently implemented separately. Mixing them in an + // AND clause will evaulate each separately, but mixing them in an OR clause disables both. + let full_table: Vec = vec![ + "+--------+--------+", + "| letter | number |", + "+--------+--------+", + "| | 6 |", + "| a | 1 |", + "| a | 4 |", + "| b | 2 |", + "| c | 3 |", + "| e | 5 |", + "+--------+--------+", + ] + .into_iter() + .map(String::from) + .collect(); + + let cases = vec![ + ( + Expression::or( + // No pruning power + column_expr!("letter").gt("a"), + column_expr!("number").gt(3i64), + ), + full_table, + ), + ( + Expression::and( + column_expr!("letter").gt("a"), // numbers 2, 3, 5 + column_expr!("number").gt(3i64), // letters a, e + ), + table_for_letters(&['e']), + ), + ( + Expression::and( + column_expr!("letter").gt("a"), // numbers 2, 3, 5 + Expression::or( + // No pruning power + column_expr!("letter").eq("c"), + column_expr!("number").eq(3i64), + ), + ), + table_for_letters(&['b', 'c', 'e']), + ), + ]; + + for (expr, expected) in cases { + read_table_data( + "./tests/data/basic_partitioned", + Some(&["letter", "number"]), + Some(expr), + expected, + )?; + } + Ok(()) +} + #[test] fn predicate_on_number_not() -> Result<(), Box> { let cases = vec![ @@ -950,8 +1082,7 @@ async fn predicate_on_non_nullable_partition_column() -> Result<(), Box Date: Thu, 13 Mar 2025 10:09:58 -0700 Subject: [PATCH 29/38] feat: add `DeletionVectors` to supported writer features (#735) ## What changes are proposed in this pull request? Add `DeletionVectors` to supported writer features. We trivially support DVs since we never write DVs. Note as we implement DML in the future we need to ensure it correctly handles DVs ## How was this change tested? modified UT --- kernel/src/actions/mod.rs | 9 ++++++--- kernel/src/table_features/mod.rs | 3 +-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/kernel/src/actions/mod.rs b/kernel/src/actions/mod.rs index 9a216f088b..1d2d83c678 100644 --- a/kernel/src/actions/mod.rs +++ b/kernel/src/actions/mod.rs @@ -529,7 +529,7 @@ pub struct SetTransaction { #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) struct Sidecar { /// A path to a sidecar file that can be either: - /// - A relative path (just the file name) within the `_delta_log/_sidecars` directory. + /// - A relative path (just the file name) within the `_delta_log/_sidecars` directory. /// - An absolute path /// The path is a URI as specified by [RFC 2396 URI Generic Syntax], which needs to be decoded /// to get the file path. @@ -892,7 +892,10 @@ mod tests { 3, 7, Some::>(vec![]), - Some(vec![WriterFeatures::AppendOnly]), + Some(vec![ + WriterFeatures::AppendOnly, + WriterFeatures::DeletionVectors, + ]), ) .unwrap(); assert!(protocol.ensure_write_supported().is_ok()); @@ -901,7 +904,7 @@ mod tests { 3, 7, Some([ReaderFeatures::DeletionVectors]), - Some([WriterFeatures::DeletionVectors]), + Some([WriterFeatures::RowTracking]), ) .unwrap(); assert!(protocol.ensure_write_supported().is_err()); diff --git a/kernel/src/table_features/mod.rs b/kernel/src/table_features/mod.rs index 5074807c92..a7002e6211 100644 --- a/kernel/src/table_features/mod.rs +++ b/kernel/src/table_features/mod.rs @@ -136,9 +136,8 @@ pub(crate) static SUPPORTED_READER_FEATURES: LazyLock> = ]) }); -// currently the only writer feature supported is `AppendOnly` pub(crate) static SUPPORTED_WRITER_FEATURES: LazyLock> = - LazyLock::new(|| HashSet::from([WriterFeatures::AppendOnly])); + LazyLock::new(|| HashSet::from([WriterFeatures::AppendOnly, WriterFeatures::DeletionVectors])); #[cfg(test)] mod tests { From 51095d464cd97787c2f9cf08e6beee0c236dd0ed Mon Sep 17 00:00:00 2001 From: Zach Schuermann Date: Mon, 17 Mar 2025 16:23:36 -0700 Subject: [PATCH 30/38] feat: add writer version 2/invariant table feature support (#734) ## What changes are proposed in this pull request? Support writer version 2 and `Invariant` table (writer) feature. Note that we don't _actually_ support invariants, rather we enable writing to tables **without invariants** with version=2 or Invariant feature enabled. ### This PR affects the following public APIs Enable writes to version=2/Invariant enabled. ## How was this change tested? new UTs resolves https://github.com/delta-io/delta-kernel-rs/issues/706 --- kernel/src/actions/mod.rs | 9 ++- kernel/src/schema/mod.rs | 120 ++++++++++++++++++++++++++++++ kernel/src/table_configuration.rs | 41 ++++++++-- kernel/src/table_features/mod.rs | 11 ++- kernel/src/transaction.rs | 4 +- 5 files changed, 172 insertions(+), 13 deletions(-) diff --git a/kernel/src/actions/mod.rs b/kernel/src/actions/mod.rs index 1d2d83c678..7fed157714 100644 --- a/kernel/src/actions/mod.rs +++ b/kernel/src/actions/mod.rs @@ -288,11 +288,11 @@ impl Protocol { )) } None => { - // no features, we currently only support version 1 in this case + // no features, we currently only support version 1 or 2 in this case require!( - self.min_writer_version == 1, + self.min_writer_version == 1 || self.min_writer_version == 2, Error::unsupported( - "Currently delta-kernel-rs can only write to tables with protocol.minWriterVersion = 1 or 7" + "Currently delta-kernel-rs can only write to tables with protocol.minWriterVersion = 1, 2, or 7" ) ); Ok(()) @@ -325,6 +325,8 @@ where .iter() .map(|s| T::from_str(s).map_err(|_| error(vec![s.to_string()], "Unknown"))) .collect::>()?; + + // check that parsed features are a subset of supported features parsed_features .is_subset(supported_features) .then_some(()) @@ -895,6 +897,7 @@ mod tests { Some(vec![ WriterFeatures::AppendOnly, WriterFeatures::DeletionVectors, + WriterFeatures::Invariants, ]), ) .unwrap(); diff --git a/kernel/src/schema/mod.rs b/kernel/src/schema/mod.rs index 11e81817cc..3b9049e541 100644 --- a/kernel/src/schema/mod.rs +++ b/kernel/src/schema/mod.rs @@ -226,6 +226,11 @@ impl StructField { .unwrap() .into_owned() } + + fn has_invariants(&self) -> bool { + self.metadata + .contains_key(ColumnMetadataKey::Invariants.as_ref()) + } } /// A struct is used to represent both the top-level schema of the table @@ -305,6 +310,34 @@ impl StructType { } } +#[derive(Debug, Default)] +pub(crate) struct InvariantChecker { + has_invariants: bool, +} + +impl<'a> SchemaTransform<'a> for InvariantChecker { + fn transform_struct_field(&mut self, field: &'a StructField) -> Option> { + if field.has_invariants() { + self.has_invariants = true; + } else if !self.has_invariants { + let _ = self.recurse_into_struct_field(field); + } + Some(Cow::Borrowed(field)) + } +} + +impl InvariantChecker { + /// Checks if any column in the schema (including nested columns) has invariants defined. + /// + /// This traverses the entire schema to check for the presence of the "delta.invariants" + /// metadata key. + pub(crate) fn has_invariants(schema: &Schema) -> bool { + let mut checker = InvariantChecker::default(); + let _ = checker.transform_struct(schema); + checker.has_invariants + } +} + /// Helper for RowVisitor implementations #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] #[derive(Clone, Default)] @@ -1208,4 +1241,91 @@ mod tests { "[\"an\",\"array\"]" ); } + + #[test] + fn test_has_invariants() { + // Schema with no invariants + let schema = StructType::new([ + StructField::nullable("a", DataType::STRING), + StructField::nullable("b", DataType::INTEGER), + ]); + assert!(!InvariantChecker::has_invariants(&schema)); + + // Schema with top-level invariant + let mut field = StructField::nullable("c", DataType::STRING); + field.metadata.insert( + ColumnMetadataKey::Invariants.as_ref().to_string(), + MetadataValue::String("c > 0".to_string()), + ); + + let schema = StructType::new([StructField::nullable("a", DataType::STRING), field]); + assert!(InvariantChecker::has_invariants(&schema)); + + // Schema with nested invariant in a struct + let nested_field = StructField::nullable( + "nested_c", + DataType::struct_type([{ + let mut field = StructField::nullable("d", DataType::INTEGER); + field.metadata.insert( + ColumnMetadataKey::Invariants.as_ref().to_string(), + MetadataValue::String("d > 0".to_string()), + ); + field + }]), + ); + + let schema = StructType::new([ + StructField::nullable("a", DataType::STRING), + StructField::nullable("b", DataType::INTEGER), + nested_field, + ]); + assert!(InvariantChecker::has_invariants(&schema)); + + // Schema with nested invariant in an array of structs + let array_field = StructField::nullable( + "array_field", + ArrayType::new( + DataType::struct_type([{ + let mut field = StructField::nullable("d", DataType::INTEGER); + field.metadata.insert( + ColumnMetadataKey::Invariants.as_ref().to_string(), + MetadataValue::String("d > 0".to_string()), + ); + field + }]), + true, + ), + ); + + let schema = StructType::new([ + StructField::nullable("a", DataType::STRING), + StructField::nullable("b", DataType::INTEGER), + array_field, + ]); + assert!(InvariantChecker::has_invariants(&schema)); + + // Schema with nested invariant in a map value that's a struct + let map_field = StructField::nullable( + "map_field", + MapType::new( + DataType::STRING, + DataType::struct_type([{ + let mut field = StructField::nullable("d", DataType::INTEGER); + field.metadata.insert( + ColumnMetadataKey::Invariants.as_ref().to_string(), + MetadataValue::String("d > 0".to_string()), + ); + field + }]), + true, + ), + ); + + let schema = StructType::new([ + StructField::nullable("a", DataType::STRING), + StructField::nullable("b", DataType::INTEGER), + map_field, + ]); + assert!(InvariantChecker::has_invariants(&schema)); + } } diff --git a/kernel/src/table_configuration.rs b/kernel/src/table_configuration.rs index dc065c8f8f..aa1280a4db 100644 --- a/kernel/src/table_configuration.rs +++ b/kernel/src/table_configuration.rs @@ -14,13 +14,13 @@ use std::sync::{Arc, LazyLock}; use url::Url; use crate::actions::{ensure_supported_features, Metadata, Protocol}; -use crate::schema::{Schema, SchemaRef}; +use crate::schema::{InvariantChecker, Schema, SchemaRef}; use crate::table_features::{ column_mapping_mode, validate_schema_column_mapping, ColumnMappingMode, ReaderFeatures, WriterFeatures, }; use crate::table_properties::TableProperties; -use crate::{DeltaResult, Version}; +use crate::{DeltaResult, Error, Version}; /// Holds all the configuration for a table at a specific version. This includes the supported /// reader and writer features, table properties, schema, version, and table root. This can be used @@ -88,49 +88,66 @@ impl TableConfiguration { version, }) } + /// The [`Metadata`] for this table at this version. #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) fn metadata(&self) -> &Metadata { &self.metadata } + /// The [`Protocol`] of this table at this version. #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) fn protocol(&self) -> &Protocol { &self.protocol } + /// The [`Schema`] of for this table at this version. #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) fn schema(&self) -> &Schema { self.schema.as_ref() } + /// The [`TableProperties`] of this table at this version. - #[allow(unused)] #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) fn table_properties(&self) -> &TableProperties { &self.table_properties } + /// The [`ColumnMappingMode`] for this table at this version. #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) fn column_mapping_mode(&self) -> ColumnMappingMode { self.column_mapping_mode } + /// The [`Url`] of the table this [`TableConfiguration`] belongs to #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) fn table_root(&self) -> &Url { &self.table_root } + /// The [`Version`] which this [`TableConfiguration`] belongs to. #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) fn version(&self) -> Version { self.version } + /// Returns `true` if the kernel supports writing to this table. This checks that the /// protocol's writer features are all supported. - #[allow(unused)] #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] - pub(crate) fn is_write_supported(&self) -> bool { - self.protocol.ensure_write_supported().is_ok() + pub(crate) fn ensure_write_supported(&self) -> DeltaResult<()> { + self.protocol.ensure_write_supported()?; + + // for now we don't allow invariants so although we support writer version 2 and the + // ColumnInvariant TableFeature we _must_ check here that they are not actually in use + if self.is_invariants_supported() && InvariantChecker::has_invariants(self.schema()) { + return Err(Error::unsupported( + "Column invariants are not yet supported", + )); + } + + Ok(()) } + /// Returns `true` if kernel supports reading Change Data Feed on this table. /// See the documentation of [`TableChanges`] for more details. /// @@ -159,12 +176,12 @@ impl TableConfiguration { ); protocol_supported && cdf_enabled && column_mapping_disabled } + /// Returns `true` if deletion vectors is supported on this table. To support deletion vectors, /// a table must support reader version 3, writer version 7, and the deletionVectors feature in /// both the protocol's readerFeatures and writerFeatures. /// /// See: - #[allow(unused)] #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) fn is_deletion_vector_supported(&self) -> bool { let read_supported = self @@ -183,7 +200,6 @@ impl TableConfiguration { /// table property is set to `true`. /// /// See: - #[allow(unused)] #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] pub(crate) fn is_deletion_vector_enabled(&self) -> bool { self.is_deletion_vector_supported() @@ -209,6 +225,15 @@ impl TableConfiguration { pub(crate) fn is_append_only_enabled(&self) -> bool { self.is_append_only_supported() && self.table_properties.append_only.unwrap_or(false) } + + /// Returns `true` if the table supports the column invariant table feature. + pub(crate) fn is_invariants_supported(&self) -> bool { + let protocol = &self.protocol; + match protocol.min_writer_version() { + 7 if protocol.has_writer_feature(&WriterFeatures::Invariants) => true, + version => (2..=6).contains(&version), + } + } } #[cfg(test)] diff --git a/kernel/src/table_features/mod.rs b/kernel/src/table_features/mod.rs index a7002e6211..0ddb227368 100644 --- a/kernel/src/table_features/mod.rs +++ b/kernel/src/table_features/mod.rs @@ -137,7 +137,16 @@ pub(crate) static SUPPORTED_READER_FEATURES: LazyLock> = }); pub(crate) static SUPPORTED_WRITER_FEATURES: LazyLock> = - LazyLock::new(|| HashSet::from([WriterFeatures::AppendOnly, WriterFeatures::DeletionVectors])); + // note: we 'support' Invariants, but only insofar as we check that they are not present. + // we support writing to tables that have Invariants enabled but not used. similarly, we only + // support DeletionVectors in that we never write them (no DML). + LazyLock::new(|| { + HashSet::from([ + WriterFeatures::AppendOnly, + WriterFeatures::DeletionVectors, + WriterFeatures::Invariants, + ]) + }); #[cfg(test)] mod tests { diff --git a/kernel/src/transaction.rs b/kernel/src/transaction.rs index 4905668a46..5124729c1f 100644 --- a/kernel/src/transaction.rs +++ b/kernel/src/transaction.rs @@ -78,7 +78,9 @@ impl Transaction { let read_snapshot = snapshot.into(); // important! before a read/write to the table we must check it is supported - read_snapshot.protocol().ensure_write_supported()?; + read_snapshot + .table_configuration() + .ensure_write_supported()?; Ok(Transaction { read_snapshot, From 43b346c6bbe646444f6b7df87e19cd4122e47623 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Wed, 19 Mar 2025 11:07:14 -0400 Subject: [PATCH 31/38] fix!: change metadata values for column metadata to i64 (#733) ## What changes are proposed in this pull request? `MetadataValue::Number(i32)` was the previous values for metadata values, but identity columns are only longs, so updated MetadataValue::Number to be `MetadataValue::Number(i64)` instead. ## How was this change tested? I ran the tests, this doesn't change any existing functionality only the type. --------- Co-authored-by: Robert Pack <42610831+roeap@users.noreply.github.com> --- kernel/src/schema/mod.rs | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/kernel/src/schema/mod.rs b/kernel/src/schema/mod.rs index 3b9049e541..c9a3b9ee79 100644 --- a/kernel/src/schema/mod.rs +++ b/kernel/src/schema/mod.rs @@ -22,7 +22,7 @@ pub type SchemaRef = Arc; #[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Eq)] #[serde(untagged)] pub enum MetadataValue { - Number(i32), + Number(i64), String(String), Boolean(bool), // The [PROTOCOL](https://github.com/delta-io/delta/blob/master/PROTOCOL.md#struct-field) states @@ -32,8 +32,8 @@ pub enum MetadataValue { Other(serde_json::Value), } -impl std::fmt::Display for MetadataValue { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +impl Display for MetadataValue { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { MetadataValue::Number(n) => write!(f, "{n}"), MetadataValue::String(s) => write!(f, "{s}"), @@ -61,8 +61,8 @@ impl From<&str> for MetadataValue { } } -impl From for MetadataValue { - fn from(value: i32) -> Self { +impl From for MetadataValue { + fn from(value: i64) -> Self { Self::Number(value) } } @@ -1072,16 +1072,22 @@ mod tests { "nullable": true, "metadata": { "delta.columnMapping.id": 4, - "delta.columnMapping.physicalName": "col-5f422f40-de70-45b2-88ab-1d5c90e94db1" + "delta.columnMapping.physicalName": "col-5f422f40-de70-45b2-88ab-1d5c90e94db1", + "delta.identity.start": 2147483648 } } "#; + let field: StructField = serde_json::from_str(data).unwrap(); let col_id = field .get_config_value(&ColumnMetadataKey::ColumnMappingId) .unwrap(); + let id_start = field + .get_config_value(&ColumnMetadataKey::IdentityStart) + .unwrap(); assert!(matches!(col_id, MetadataValue::Number(num) if *num == 4)); + assert!(matches!(id_start, MetadataValue::Number(num) if *num == 2147483648i64)); assert_eq!( field.physical_name(), "col-5f422f40-de70-45b2-88ab-1d5c90e94db1" From ed34c42cf993f01fb5a7cf2048a5589a8c45f272 Mon Sep 17 00:00:00 2001 From: Robert Pack <42610831+roeap@users.noreply.github.com> Date: Wed, 19 Mar 2025 10:31:44 -0700 Subject: [PATCH 32/38] ci: use maintained action to setup rust toolchain (#585) ## What changes are proposed in this pull request? The `actions-rs/toolchain` action is deprecated in favor of `actions-rust-lang/setup-rust-toolchain`. This PR updates the usages of the respective actions in the github workflows. THe new action already includes an integration with the rust-cache action, so no need to set that up separately anymore. This also sets up a dependabot configuration for `cargo` and `github-actions` which we may or may not choose to keep. ## How was this change tested? no code changes. --------- Co-authored-by: Zach Schuermann --- .github/workflows/build.yml | 66 ++++++---------------- .github/workflows/run_integration_test.yml | 12 +--- .github/workflows/semver-checks.yml | 7 +-- kernel/src/table_configuration.rs | 2 + 4 files changed, 24 insertions(+), 63 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index a8a24dd074..70deb53cc8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -10,27 +10,20 @@ jobs: format: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - name: Install minimal stable with clippy and rustfmt - uses: actions-rs/toolchain@v1 + - uses: actions/checkout@v4 + - name: Install minimal stable with rustfmt + uses: actions-rust-lang/setup-rust-toolchain@v1 with: - profile: default - toolchain: stable - override: true - - uses: Swatinem/rust-cache@v2 + components: rustfmt - name: format run: cargo fmt -- --check + msrv: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install minimal stable and cargo msrv - uses: actions-rs/toolchain@v1 - with: - profile: default - toolchain: stable - override: true - - uses: Swatinem/rust-cache@v2 + uses: actions-rust-lang/setup-rust-toolchain@v1 - name: Install cargo-msrv shell: bash run: | @@ -46,11 +39,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install minimal stable and cargo msrv - uses: actions-rs/toolchain@v1 - with: - profile: default - toolchain: stable - override: true + uses: actions-rust-lang/setup-rust-toolchain@v1 - uses: Swatinem/rust-cache@v2 - name: Install cargo-msrv shell: bash @@ -74,16 +63,12 @@ jobs: env: RUSTDOCFLAGS: -D warnings steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install minimal stable - uses: actions-rs/toolchain@v1 - with: - profile: default - toolchain: stable - override: true - - uses: Swatinem/rust-cache@v2 + uses: actions-rust-lang/setup-rust-toolchain@v1 - name: build docs run: cargo doc + build: runs-on: ${{ matrix.os }} strategy: @@ -93,16 +78,11 @@ jobs: - ubuntu-latest - windows-latest steps: - - uses: actions/checkout@v3 - - name: Install minimal stable with clippy and rustfmt - uses: actions-rs/toolchain@v1 + - uses: actions/checkout@v4 + - name: Install minimal stable with clippy + uses: actions-rust-lang/setup-rust-toolchain@v1 with: - profile: default - toolchain: stable - override: true - - uses: Swatinem/rust-cache@v2 - - name: check kernel builds with no-default-features - run: cargo build -p delta_kernel --no-default-features + components: clippy - name: build and lint with clippy run: cargo clippy --benches --tests --all-features -- -D warnings - name: lint without default features @@ -120,14 +100,9 @@ jobs: - ubuntu-latest - windows-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install minimal stable with clippy and rustfmt - uses: actions-rs/toolchain@v1 - with: - profile: default - toolchain: stable - override: true - - uses: Swatinem/rust-cache@v2 + uses: actions-rust-lang/setup-rust-toolchain@v1 - name: test run: cargo test --workspace --verbose --all-features -- --skip read_table_version_hdfs @@ -220,14 +195,9 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install rust - uses: actions-rs/toolchain@v1 - with: - profile: default - toolchain: stable - override: true + uses: actions-rust-lang/setup-rust-toolchain@v1 - name: Install cargo-llvm-cov uses: taiki-e/install-action@cargo-llvm-cov - - uses: Swatinem/rust-cache@v2 - name: Generate code coverage run: cargo llvm-cov --all-features --workspace --codecov --output-path codecov.json -- --skip read_table_version_hdfs - name: Upload coverage to Codecov diff --git a/.github/workflows/run_integration_test.yml b/.github/workflows/run_integration_test.yml index 1ff681cf6a..73ffd599c8 100644 --- a/.github/workflows/run_integration_test.yml +++ b/.github/workflows/run_integration_test.yml @@ -18,17 +18,11 @@ jobs: - name: Skip job for pull requests on Windows if: ${{ matrix.skip }} run: echo "Skipping job for pull requests on Windows." - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 if: ${{ !matrix.skip }} - - name: Install minimal stable rust - if: ${{ !matrix.skip }} - uses: actions-rs/toolchain@v1 - with: - profile: default - toolchain: stable - override: true - - uses: Swatinem/rust-cache@v2 + - name: Setup rust toolchain if: ${{ !matrix.skip }} + uses: actions-rust-lang/setup-rust-toolchain@v1 - name: Run integration tests if: ${{ !matrix.skip }} shell: bash diff --git a/.github/workflows/semver-checks.yml b/.github/workflows/semver-checks.yml index 3f03744521..7bd39b167d 100644 --- a/.github/workflows/semver-checks.yml +++ b/.github/workflows/semver-checks.yml @@ -25,12 +25,7 @@ jobs: fetch-depth: 0 ref: ${{ github.event.pull_request.head.sha }} - name: Install minimal stable - uses: actions-rs/toolchain@v1 - with: - profile: default - toolchain: stable - override: true - - uses: Swatinem/rust-cache@v2 + uses: actions-rust-lang/setup-rust-toolchain@v1 - name: Install cargo-semver-checks shell: bash run: | diff --git a/kernel/src/table_configuration.rs b/kernel/src/table_configuration.rs index aa1280a4db..133a9b3167 100644 --- a/kernel/src/table_configuration.rs +++ b/kernel/src/table_configuration.rs @@ -183,6 +183,7 @@ impl TableConfiguration { /// /// See: #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] + #[allow(unused)] // needed to compile w/o default features pub(crate) fn is_deletion_vector_supported(&self) -> bool { let read_supported = self .protocol() @@ -201,6 +202,7 @@ impl TableConfiguration { /// /// See: #[cfg_attr(feature = "developer-visibility", visibility::make(pub))] + #[allow(unused)] // needed to compile w/o default features pub(crate) fn is_deletion_vector_enabled(&self) -> bool { self.is_deletion_vector_supported() && self From 6e8e08c663fb6beac09a9010d5afd391ee51510d Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Wed, 19 Mar 2025 20:00:09 +0100 Subject: [PATCH 33/38] fix: pr feedback --- kernel/src/engine/default/filesystem.rs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/kernel/src/engine/default/filesystem.rs b/kernel/src/engine/default/filesystem.rs index 435fb669d9..110788c439 100644 --- a/kernel/src/engine/default/filesystem.rs +++ b/kernel/src/engine/default/filesystem.rs @@ -44,16 +44,18 @@ impl FileSystemClient for ObjectStoreFileSystemClient { &self, path: &Url, ) -> DeltaResult>>> { - let url = path.clone(); + // The offset is used for list-after; the prefix is used to restrict the listing to a specific directory. + // Unfortunately, `Path`` provides no easy way to check whether a name is directory-like, + // because it strips trailing /, so we're reduced to manually checking the original URL. let offset = Path::from_url_path(path.path())?; - let prefix = if url.path().ends_with('/') { + let prefix = if path.path().ends_with('/') { offset.clone() } else { - let parts = offset.parts().collect_vec(); - if parts.pop().is_empty() { + let mut parts = offset.parts().collect_vec(); + if parts.pop().is_none() { return Err(Error::generic(format!( "Offset path must not be a root directory. Got: '{}'", - url.as_str() + path.as_str() ))); } Path::from_iter(parts) @@ -63,7 +65,7 @@ impl FileSystemClient for ObjectStoreFileSystemClient { // This channel will become the iterator let (sender, receiver) = std::sync::mpsc::sync_channel(4_000); - + let url = path.clone(); self.task_executor.spawn(async move { let mut stream = store.list_with_offset(Some(&prefix), &offset); From 4138dd479af0027a2196b7ff375fe7b45953eed2 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Wed, 19 Mar 2025 20:08:31 +0100 Subject: [PATCH 34/38] fix: new default engine callsites --- kernel/src/log_segment/tests.rs | 48 ++++++--------------------------- kernel/tests/read.rs | 2 -- 2 files changed, 8 insertions(+), 42 deletions(-) diff --git a/kernel/src/log_segment/tests.rs b/kernel/src/log_segment/tests.rs index 4fdbd9feee..d00ad235ca 100644 --- a/kernel/src/log_segment/tests.rs +++ b/kernel/src/log_segment/tests.rs @@ -853,11 +853,7 @@ fn test_checkpoint_batch_with_no_sidecars_returns_none() -> DeltaResult<()> { #[test] fn test_checkpoint_batch_with_sidecars_returns_sidecar_batches() -> DeltaResult<()> { let (store, log_root) = new_in_memory_store(); - let engine = DefaultEngine::new( - store.clone(), - Path::from("/"), - Arc::new(TokioBackgroundExecutor::new()), - ); + let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new())); let read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME, SIDECAR_NAME])?; add_sidecar_to_store( @@ -897,11 +893,7 @@ fn test_checkpoint_batch_with_sidecars_returns_sidecar_batches() -> DeltaResult< #[test] fn test_checkpoint_batch_with_sidecar_files_that_do_not_exist() -> DeltaResult<()> { let (store, log_root) = new_in_memory_store(); - let engine = DefaultEngine::new( - store.clone(), - Path::from("/"), - Arc::new(TokioBackgroundExecutor::new()), - ); + let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new())); let checkpoint_batch = sidecar_batch_with_given_paths( vec!["sidecarfile1.parquet", "sidecarfile2.parquet"], @@ -928,11 +920,7 @@ fn test_checkpoint_batch_with_sidecar_files_that_do_not_exist() -> DeltaResult<( #[test] fn test_reading_sidecar_files_with_predicate() -> DeltaResult<()> { let (store, log_root) = new_in_memory_store(); - let engine = DefaultEngine::new( - store.clone(), - Path::from("/"), - Arc::new(TokioBackgroundExecutor::new()), - ); + let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new())); let read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME, SIDECAR_NAME])?; let checkpoint_batch = @@ -1018,11 +1006,7 @@ fn test_create_checkpoint_stream_errors_when_schema_has_add_but_no_sidecar_actio fn test_create_checkpoint_stream_returns_checkpoint_batches_as_is_if_schema_has_no_file_actions( ) -> DeltaResult<()> { let (store, log_root) = new_in_memory_store(); - let engine = DefaultEngine::new( - store.clone(), - Path::from("/"), - Arc::new(TokioBackgroundExecutor::new()), - ); + let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new())); add_checkpoint_to_store( &store, // Create a checkpoint batch with sidecar actions to verify that the sidecar actions are not read. @@ -1061,11 +1045,7 @@ fn test_create_checkpoint_stream_returns_checkpoint_batches_as_is_if_schema_has_ fn test_create_checkpoint_stream_returns_checkpoint_batches_if_checkpoint_is_multi_part( ) -> DeltaResult<()> { let (store, log_root) = new_in_memory_store(); - let engine = DefaultEngine::new( - store.clone(), - Path::from("/"), - Arc::new(TokioBackgroundExecutor::new()), - ); + let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new())); // Multi-part checkpoints should never contain sidecar actions. // This test intentionally includes batches with sidecar actions in multi-part checkpoints @@ -1125,11 +1105,7 @@ fn test_create_checkpoint_stream_returns_checkpoint_batches_if_checkpoint_is_mul fn test_create_checkpoint_stream_reads_parquet_checkpoint_batch_without_sidecars() -> DeltaResult<()> { let (store, log_root) = new_in_memory_store(); - let engine = DefaultEngine::new( - store.clone(), - Path::from("/"), - Arc::new(TokioBackgroundExecutor::new()), - ); + let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new())); add_checkpoint_to_store( &store, @@ -1164,11 +1140,7 @@ fn test_create_checkpoint_stream_reads_parquet_checkpoint_batch_without_sidecars #[test] fn test_create_checkpoint_stream_reads_json_checkpoint_batch_without_sidecars() -> DeltaResult<()> { let (store, log_root) = new_in_memory_store(); - let engine = DefaultEngine::new( - store.clone(), - Path::from("/"), - Arc::new(TokioBackgroundExecutor::new()), - ); + let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new())); write_json_to_store( &store, @@ -1218,11 +1190,7 @@ fn test_create_checkpoint_stream_reads_json_checkpoint_batch_without_sidecars() fn test_create_checkpoint_stream_reads_checkpoint_file_and_returns_sidecar_batches( ) -> DeltaResult<()> { let (store, log_root) = new_in_memory_store(); - let engine = DefaultEngine::new( - store.clone(), - Path::from("/"), - Arc::new(TokioBackgroundExecutor::new()), - ); + let engine = DefaultEngine::new(store.clone(), Arc::new(TokioBackgroundExecutor::new())); add_checkpoint_to_store( &store, diff --git a/kernel/tests/read.rs b/kernel/tests/read.rs index 6bd115d1e2..db9edbc68e 100644 --- a/kernel/tests/read.rs +++ b/kernel/tests/read.rs @@ -1062,7 +1062,6 @@ async fn predicate_on_non_nullable_partition_column() -> Result<(), Box Result<(), Box Date: Wed, 19 Mar 2025 20:10:52 +0100 Subject: [PATCH 35/38] fix: remove unused import --- ffi/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ffi/src/lib.rs b/ffi/src/lib.rs index a7725db29a..af8f15edda 100644 --- a/ffi/src/lib.rs +++ b/ffi/src/lib.rs @@ -765,7 +765,7 @@ impl Default for ReferenceSet { #[cfg(test)] mod tests { use delta_kernel::engine::default::{executor::tokio::TokioBackgroundExecutor, DefaultEngine}; - use object_store::{memory::InMemory, path::Path}; + use object_store::memory::InMemory; use test_utils::{actions_to_string, actions_to_string_partitioned, add_commit, TestAction}; use super::*; From b81e478a87a7b062c8feeff8a145eb492b585035 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Wed, 19 Mar 2025 21:53:27 +0100 Subject: [PATCH 36/38] fix: pr feedback --- kernel/src/engine/default/filesystem.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/src/engine/default/filesystem.rs b/kernel/src/engine/default/filesystem.rs index 110788c439..7d99e04ecc 100644 --- a/kernel/src/engine/default/filesystem.rs +++ b/kernel/src/engine/default/filesystem.rs @@ -239,7 +239,7 @@ mod tests { let engine = DefaultEngine::new(store, Arc::new(TokioBackgroundExecutor::new())); let files: Vec<_> = engine .get_file_system_client() - .list_from(&table_root.join("_delta_log/0").unwrap()) + .list_from(&table_root.join("_delta_log").unwrap().join("0").unwrap()) .unwrap() .try_collect() .unwrap(); @@ -270,7 +270,7 @@ mod tests { let client = engine.get_file_system_client(); let files = client - .list_from(&url.join("_delta_log/0").unwrap()) + .list_from(&url.join("_delta_log").unwrap().join("0").unwrap()) .unwrap(); let mut len = 0; for (file, expected) in files.zip(expected_names.iter()) { From cdf814dabadd01db2a54d3c742f29ce570d09e70 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Thu, 20 Mar 2025 02:14:28 +0100 Subject: [PATCH 37/38] fix: pr feedback --- kernel/src/engine/default/filesystem.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/src/engine/default/filesystem.rs b/kernel/src/engine/default/filesystem.rs index 7d99e04ecc..c23f5133d0 100644 --- a/kernel/src/engine/default/filesystem.rs +++ b/kernel/src/engine/default/filesystem.rs @@ -53,7 +53,7 @@ impl FileSystemClient for ObjectStoreFileSystemClient { } else { let mut parts = offset.parts().collect_vec(); if parts.pop().is_none() { - return Err(Error::generic(format!( + return Err(Error::Generic(format!( "Offset path must not be a root directory. Got: '{}'", path.as_str() ))); From 79bd24d57d36dbf5d709d264dfa77fa0933d9a32 Mon Sep 17 00:00:00 2001 From: Robert Pack <42610831+roeap@users.noreply.github.com> Date: Thu, 20 Mar 2025 19:58:43 -0700 Subject: [PATCH 38/38] Update kernel/src/engine/default/filesystem.rs Co-authored-by: Zach Schuermann --- kernel/src/engine/default/filesystem.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/src/engine/default/filesystem.rs b/kernel/src/engine/default/filesystem.rs index c23f5133d0..21432ed70e 100644 --- a/kernel/src/engine/default/filesystem.rs +++ b/kernel/src/engine/default/filesystem.rs @@ -45,7 +45,7 @@ impl FileSystemClient for ObjectStoreFileSystemClient { path: &Url, ) -> DeltaResult>>> { // The offset is used for list-after; the prefix is used to restrict the listing to a specific directory. - // Unfortunately, `Path`` provides no easy way to check whether a name is directory-like, + // Unfortunately, `Path` provides no easy way to check whether a name is directory-like, // because it strips trailing /, so we're reduced to manually checking the original URL. let offset = Path::from_url_path(path.path())?; let prefix = if path.path().ends_with('/') {