From 27accd7119f6e97fadc57b6db78c9ec348de6c96 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Fri, 17 Jan 2025 00:13:49 +0100 Subject: [PATCH] test: run some more dat tests Signed-off-by: Robert Pack --- .../{load_dat => load-dat}/action.yaml | 6 +- .github/workflows/build.yml | 17 ++-- Cargo.toml | 14 +-- crates/core/Cargo.toml | 6 +- crates/core/tests/dat.rs | 99 +++++++++++++++++++ 5 files changed, 125 insertions(+), 17 deletions(-) rename .github/actions/{load_dat => load-dat}/action.yaml (81%) create mode 100644 crates/core/tests/dat.rs diff --git a/.github/actions/load_dat/action.yaml b/.github/actions/load-dat/action.yaml similarity index 81% rename from .github/actions/load_dat/action.yaml rename to .github/actions/load-dat/action.yaml index 071db58ba0..6d40707b3c 100644 --- a/.github/actions/load_dat/action.yaml +++ b/.github/actions/load-dat/action.yaml @@ -19,8 +19,8 @@ runs: - name: load DAT shell: bash run: | - rm -rf {{ inputs.target-directory }} + rm -rf ${{ inputs.target-directory }} curl -OL https://github.com/delta-incubator/dat/releases/download/v${{ inputs.version }}/deltalake-dat-v${{ inputs.version }}.tar.gz - mkdir -p {{ inputs.target-directory }} - tar --no-same-permissions -xzf deltalake-dat-v${{ inputs.version }}.tar.gz --directory {{ inputs.target-directory }} + mkdir -p ${{ inputs.target-directory }} + tar --no-same-permissions -xzf deltalake-dat-v${{ inputs.version }}.tar.gz --directory ${{ inputs.target-directory }} rm deltalake-dat-v${{ inputs.version }}.tar.gz diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 578ae305ea..823d10ff0c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -20,7 +20,7 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: default - toolchain: '1.81' + toolchain: "1.81" override: true - name: Format @@ -42,7 +42,7 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: default - toolchain: '1.81' + toolchain: "1.81" override: true - name: build and lint with clippy @@ -79,9 +79,12 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: default - toolchain: '1.81' + toolchain: "1.81" override: true + - name: Load DAT data + uses: ./.github/actions/load-dat + - name: Run tests run: cargo test --verbose --features ${{ env.DEFAULT_FEATURES }} @@ -114,7 +117,7 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: default - toolchain: '1.81' + toolchain: "1.81" override: true # Install Java and Hadoop for HDFS integration tests @@ -129,6 +132,9 @@ jobs: tar -xf hadoop-3.4.0.tar.gz -C $GITHUB_WORKSPACE echo "$GITHUB_WORKSPACE/hadoop-3.4.0/bin" >> $GITHUB_PATH + - name: Load DAT data + uses: ./.github/actions/load-dat + - name: Start emulated services run: docker compose up -d @@ -160,7 +166,7 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: default - toolchain: '1.81' + toolchain: "1.81" override: true - name: Download Lakectl @@ -175,4 +181,3 @@ jobs: - name: Run tests with rustls (default) run: | cargo test --features integration_test_lakefs,lakefs,datafusion - diff --git a/Cargo.toml b/Cargo.toml index c1bc6ea502..c3e53c69af 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,15 +26,15 @@ debug = true debug = "line-tables-only" [workspace.dependencies] -#delta_kernel = { version = "=0.6.0", features = ["default-engine"] } -delta_kernel = { path = "../delta-kernel-rs/kernel", features = [ - "default-engine", - "developer-visibility", -] } -# delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "fcc43b50dafdc5e6b84c206492bbde8ed1115529", features = [ +# delta_kernel = { version = "=0.6.0", features = ["default-engine"] } +# delta_kernel = { path = "../delta-kernel-rs/kernel", features = [ # "default-engine", # "developer-visibility", # ] } +delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "caeb70ab78e4d5f3b56b5105fd3587c1046d1e1b", features = [ + "default-engine", + "developer-visibility", +] } # arrow arrow = { version = "53" } @@ -48,7 +48,7 @@ arrow-ord = { version = "53" } arrow-row = { version = "53" } arrow-schema = { version = "53" } arrow-select = { version = "53" } -object_store = { version = "0.11.2" , features = ["cloud"]} +object_store = { version = "0.11.2", features = ["cloud"] } parquet = { version = "53" } # datafusion diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 532f041f68..619acd01b9 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -46,7 +46,7 @@ datafusion-functions-aggregate = { workspace = true, optional = true } # serde serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } -strum = { workspace = true} +strum = { workspace = true } # "stdlib" bytes = { workspace = true } @@ -132,3 +132,7 @@ datafusion = [ datafusion-ext = ["datafusion"] json = ["parquet/json"] python = ["arrow/pyarrow"] + +[[test]] +name = "dat" +harness = false diff --git a/crates/core/tests/dat.rs b/crates/core/tests/dat.rs new file mode 100644 index 0000000000..82daf5c20e --- /dev/null +++ b/crates/core/tests/dat.rs @@ -0,0 +1,99 @@ +use std::path::Path; +use std::sync::Arc; + +use delta_kernel::Table; +use deltalake_core::kernel::snapshot_next::{LazySnapshot, Snapshot}; +use deltalake_test::acceptance::read_dat_case; + +static SKIPPED_TESTS: &[&str; 1] = &["iceberg_compat_v1"]; + +fn reader_test_lazy(path: &Path) -> datatest_stable::Result<()> { + let root_dir = format!( + "{}/{}", + env!["CARGO_MANIFEST_DIR"], + path.parent().unwrap().to_str().unwrap() + ); + for skipped in SKIPPED_TESTS { + if root_dir.ends_with(skipped) { + println!("Skipping test: {}", skipped); + return Ok(()); + } + } + + tokio::runtime::Builder::new_current_thread() + .enable_all() + .build()? + .block_on(async { + let case = read_dat_case(root_dir).unwrap(); + + let table = Table::try_from_uri(case.table_root().unwrap()).expect("table"); + let snapshot = LazySnapshot::try_new( + table, + Arc::new(object_store::local::LocalFileSystem::default()), + None, + ) + .await + .unwrap(); + + let table_info = case.table_summary().expect("load summary"); + assert_eq!(snapshot.version(), table_info.version); + assert_eq!( + ( + snapshot.protocol().min_reader_version(), + snapshot.protocol().min_writer_version() + ), + (table_info.min_reader_version, table_info.min_writer_version) + ); + }); + Ok(()) +} + +fn reader_test_eager(path: &Path) -> datatest_stable::Result<()> { + let root_dir = format!( + "{}/{}", + env!["CARGO_MANIFEST_DIR"], + path.parent().unwrap().to_str().unwrap() + ); + for skipped in SKIPPED_TESTS { + if root_dir.ends_with(skipped) { + println!("Skipping test: {}", skipped); + return Ok(()); + } + } + + tokio::runtime::Builder::new_current_thread() + .enable_all() + .build()? + .block_on(async { + let case = read_dat_case(root_dir).unwrap(); + + let table = Table::try_from_uri(case.table_root().unwrap()).expect("table"); + let snapshot = LazySnapshot::try_new( + table, + Arc::new(object_store::local::LocalFileSystem::default()), + None, + ) + .await + .unwrap(); + + let table_info = case.table_summary().expect("load summary"); + assert_eq!(snapshot.version(), table_info.version); + assert_eq!( + ( + snapshot.protocol().min_reader_version(), + snapshot.protocol().min_writer_version() + ), + (table_info.min_reader_version, table_info.min_writer_version) + ); + }); + Ok(()) +} + +datatest_stable::harness!( + reader_test_lazy, + "../../dat/out/reader_tests/generated/", + r"test_case_info\.json", + reader_test_eager, + "../../dat/out/reader_tests/generated/", + r"test_case_info\.json" +);