From fafc776cf076432572a4ce161d6859551916676f Mon Sep 17 00:00:00 2001 From: "R. Tyler Croy" Date: Fri, 10 Jan 2025 15:07:44 -0800 Subject: [PATCH 1/5] chore: expand the arrow version range to allow arrow v54 (#616) This was released last week and in preliminary testing it appears safe to incorporate for the latest version range ## What changes are proposed in this pull request? ## How was this change tested? ``` cargo test && cargo build && cargo test --features sync-engine ``` Signed-off-by: R. Tyler Croy --- Cargo.toml | 26 +++++++++++++++----------- acceptance/src/data.rs | 6 +----- ffi/Cargo.toml | 6 +++--- kernel/tests/golden_tables.rs | 6 +----- 4 files changed, 20 insertions(+), 24 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 220ab18c0..477bb5cd5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,17 +23,21 @@ rust-version = "1.80" version = "0.6.0" [workspace.dependencies] -arrow = { version = ">=53, <54" } -arrow-arith = { version = ">=53, <54" } -arrow-array = { version = ">=53, <54" } -arrow-buffer = { version = ">=53, <54" } -arrow-cast = { version = ">=53, <54" } -arrow-data = { version = ">=53, <54" } -arrow-ord = { version = ">=53, <54" } -arrow-json = { version = ">=53, <54" } -arrow-select = { version = ">=53, <54" } -arrow-schema = { version = ">=53, <54" } -parquet = { version = ">=53, <54", features = ["object_store"] } +# When changing the arrow version range, also modify ffi/Cargo.toml which has +# its own arrow version ranges witeh modified features. Failure to do so will +# result in compilation errors as two different sets of arrow dependencies may +# be sourced +arrow = { version = ">=53, <55" } +arrow-arith = { version = ">=53, <55" } +arrow-array = { version = ">=53, <55" } +arrow-buffer = { version = ">=53, <55" } +arrow-cast = { version = ">=53, <55" } +arrow-data = { version = ">=53, <55" } +arrow-ord = { version = ">=53, <55" } +arrow-json = { version = ">=53, <55" } +arrow-select = { version = ">=53, <55" } +arrow-schema = { version = ">=53, <55" } +parquet = { version = ">=53, <55", features = ["object_store"] } object_store = { version = ">=0.11, <0.12" } hdfs-native-object-store = "0.12.0" hdfs-native = "0.10.0" diff --git a/acceptance/src/data.rs b/acceptance/src/data.rs index 9832ac8a4..c515d50c9 100644 --- a/acceptance/src/data.rs +++ b/acceptance/src/data.rs @@ -61,7 +61,7 @@ pub fn sort_record_batch(batch: RecordBatch) -> DeltaResult { Ok(RecordBatch::try_new(batch.schema(), columns)?) } -// Ensure that two schema have the same field names, and dict_id/ordering. +// Ensure that two schema have the same field names, and dict_is_ordered // We ignore: // - data type: This is checked already in `assert_columns_match` // - nullability: parquet marks many things as nullable that we don't in our schema @@ -72,10 +72,6 @@ fn assert_schema_fields_match(schema: &Schema, golden: &Schema) { schema_field.name() == golden_field.name(), "Field names don't match" ); - assert!( - schema_field.dict_id() == golden_field.dict_id(), - "Field dict_id doesn't match" - ); assert!( schema_field.dict_is_ordered() == golden_field.dict_is_ordered(), "Field dict_is_ordered doesn't match" diff --git a/ffi/Cargo.toml b/ffi/Cargo.toml index 08162a505..25897d965 100644 --- a/ffi/Cargo.toml +++ b/ffi/Cargo.toml @@ -24,13 +24,13 @@ delta_kernel = { path = "../kernel", default-features = false, features = [ delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.6.0" } # used if we use the default engine to be able to move arrow data into the c-ffi format -arrow-schema = { version = "53.0", default-features = false, features = [ +arrow-schema = { version = ">=53, <55", default-features = false, features = [ "ffi", ], optional = true } -arrow-data = { version = "53.0", default-features = false, features = [ +arrow-data = { version = ">=53, <55", default-features = false, features = [ "ffi", ], optional = true } -arrow-array = { version = "53.0", default-features = false, optional = true } +arrow-array = { version = ">=53, <55", default-features = false, optional = true } [build-dependencies] cbindgen = "0.27.0" diff --git a/kernel/tests/golden_tables.rs b/kernel/tests/golden_tables.rs index 1d0c8406b..cd9023db1 100644 --- a/kernel/tests/golden_tables.rs +++ b/kernel/tests/golden_tables.rs @@ -89,7 +89,7 @@ fn sort_record_batch(batch: RecordBatch) -> DeltaResult { Ok(RecordBatch::try_new(batch.schema(), columns)?) } -// Ensure that two sets of fields have the same names, and dict_id/ordering. +// Ensure that two sets of fields have the same names, and dict_is_ordered // We ignore: // - data type: This is checked already in `assert_columns_match` // - nullability: parquet marks many things as nullable that we don't in our schema @@ -103,10 +103,6 @@ fn assert_fields_match<'a>( actual_field.name() == expected_field.name(), "Field names don't match" ); - assert!( - actual_field.dict_id() == expected_field.dict_id(), - "Field dict_id doesn't match" - ); assert!( actual_field.dict_is_ordered() == expected_field.dict_is_ordered(), "Field dict_is_ordered doesn't match" From 0816cebb40c1d0aee44a4a470d067c67a20c66b5 Mon Sep 17 00:00:00 2001 From: Zach Schuermann Date: Fri, 10 Jan 2025 15:44:17 -0800 Subject: [PATCH 2/5] add release script (#636) add release script to start automating our releases with `cargo-release` and `git-cliff`. the main release script is `./release.sh` with rather straightforward usage. we use `cargo-release` to handle some of the `Cargo.toml` updates and trigger git-cliff, but otherwise the script just manually uses cargo/git to publish the release and tag it the release flow will be the following: 1. from an up-to-date `main` branch, checkout a new release branch (like `prepare 0.x.y`) 2. run `./release 0.x.y` to do the 'prepare' phase of generating a changelog, updating readme etc. a. the tool will validate that you're on a release branch, generate the changelog, and make a commit and (optionally) open a PR (cargo-release is used to update the versions correctly and git-cliff is used as a hook in cargo-release to generate the changelog) b. then the only main action is just making minor CHANGELOG changes and updating + merging the PR 3. merge the 'prepare' PR 4. from `main` branch with prepare branch merged, run: `./release.sh` a. the tool will publish any unpublished version (e.g. if 0.6.0 is latest on crates.io and the prepare PR bumps to 0.6.1 then the tool will do the release for `delta_kernel` and `delta_kernel_derive`, add a git tag, and push the tag) b. NOTE: this is currently done manually but could leverage cargo-release in the future --- .github/pull_request_template.md | 13 ++ acceptance/Cargo.toml | 3 + cliff.toml | 70 ++++++++ feature-tests/Cargo.toml | 3 + ffi-proc-macros/Cargo.toml | 3 + ffi/Cargo.toml | 3 + kernel/Cargo.toml | 7 + kernel/examples/inspect-table/Cargo.toml | 3 + kernel/examples/read-table-changes/Cargo.toml | 3 + .../read-table-multi-threaded/Cargo.toml | 3 + .../read-table-single-threaded/Cargo.toml | 3 + release.sh | 170 ++++++++++++++++++ release.toml | 3 + test-utils/Cargo.toml | 3 + 14 files changed, 290 insertions(+) create mode 100644 cliff.toml create mode 100755 release.sh create mode 100644 release.toml diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 5241cad3f..ec2fd0d91 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -7,6 +7,19 @@ Thanks for sending a pull request! Here are some tips for you: 5. Be sure to keep the PR description updated to reflect all changes. --> + + ## What changes are proposed in this pull request? ๐Ÿ—๏ธ Breaking changes" }, + { message = "^feat", group = "๐Ÿš€ Features / new APIs" }, + { message = "^fix", group = "๐Ÿ› Bug Fixes" }, + { message = "^doc", group = "๐Ÿ“š Documentation" }, + { message = "^perf", group = "โšก Performance" }, + { message = "^refactor", group = "๐Ÿšœ Refactor" }, + { message = "^test", group = "๐Ÿงช Testing" }, + { message = "^chore|^ci", group = "โš™๏ธ Chores/CI" }, + { message = "^revert", group = "โ—€๏ธ Revert" }, + { message = ".*", group = "Other" }, +] +# filter out the commits that are not matched by commit parsers +filter_commits = false +# sort the tags topologically +topo_order = false +# sort the commits inside sections by oldest/newest order +sort_commits = "oldest" diff --git a/feature-tests/Cargo.toml b/feature-tests/Cargo.toml index 6f9827e83..7e45e41e2 100644 --- a/feature-tests/Cargo.toml +++ b/feature-tests/Cargo.toml @@ -8,6 +8,9 @@ repository.workspace = true readme.workspace = true version.workspace = true +[package.metadata.release] +release = false + [dependencies] delta_kernel = { path = "../kernel" } diff --git a/ffi-proc-macros/Cargo.toml b/ffi-proc-macros/Cargo.toml index ce8f1936b..326462247 100644 --- a/ffi-proc-macros/Cargo.toml +++ b/ffi-proc-macros/Cargo.toml @@ -10,6 +10,9 @@ readme.workspace = true rust-version.workspace = true version.workspace = true +[package.metadata.release] +release = false + [lib] proc-macro = true diff --git a/ffi/Cargo.toml b/ffi/Cargo.toml index 25897d965..d0904c24f 100644 --- a/ffi/Cargo.toml +++ b/ffi/Cargo.toml @@ -10,6 +10,9 @@ version.workspace = true rust-version.workspace = true build = "build.rs" +[package.metadata.release] +release = false + [lib] crate-type = ["lib", "cdylib", "staticlib"] diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index 459c49177..e56608a33 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -15,6 +15,13 @@ rust-version.workspace = true [package.metadata.docs.rs] all-features = true +[package.metadata.release] +pre-release-replacements = [ + {file="../README.md", search="delta_kernel = \"[a-z0-9\\.-]+\"", replace="delta_kernel = \"{{version}}\""}, + {file="../README.md", search="version = \"[a-z0-9\\.-]+\"", replace="version = \"{{version}}\""}, +] +pre-release-hook = ["git", "cliff", "--repository", "../", "--config", "../cliff.toml", "--unreleased", "--prepend", "../CHANGELOG.md", "--tag", "{{version}}" ] + [dependencies] bytes = "1.7" chrono = { version = "0.4" } diff --git a/kernel/examples/inspect-table/Cargo.toml b/kernel/examples/inspect-table/Cargo.toml index 174f84100..b81a8ac5b 100644 --- a/kernel/examples/inspect-table/Cargo.toml +++ b/kernel/examples/inspect-table/Cargo.toml @@ -15,3 +15,6 @@ delta_kernel = { path = "../../../kernel", features = [ ] } env_logger = "0.11.3" url = "2" + +[package.metadata.release] +release = false diff --git a/kernel/examples/read-table-changes/Cargo.toml b/kernel/examples/read-table-changes/Cargo.toml index f9f980dc2..181da7dc6 100644 --- a/kernel/examples/read-table-changes/Cargo.toml +++ b/kernel/examples/read-table-changes/Cargo.toml @@ -4,6 +4,9 @@ version = "0.1.0" edition = "2021" publish = false +[package.metadata.release] +release = false + [dependencies] arrow-array = { workspace = true } arrow-schema = { workspace = true } diff --git a/kernel/examples/read-table-multi-threaded/Cargo.toml b/kernel/examples/read-table-multi-threaded/Cargo.toml index 178435a38..3362e579a 100644 --- a/kernel/examples/read-table-multi-threaded/Cargo.toml +++ b/kernel/examples/read-table-multi-threaded/Cargo.toml @@ -17,3 +17,6 @@ env_logger = "0.11.5" itertools = "0.13" spmc = "0.3.0" url = "2" + +[package.metadata.release] +release = false diff --git a/kernel/examples/read-table-single-threaded/Cargo.toml b/kernel/examples/read-table-single-threaded/Cargo.toml index 6e0dc147a..dc0458139 100644 --- a/kernel/examples/read-table-single-threaded/Cargo.toml +++ b/kernel/examples/read-table-single-threaded/Cargo.toml @@ -16,3 +16,6 @@ delta_kernel = { path = "../../../kernel", features = [ env_logger = "0.11.5" itertools = "0.13" url = "2" + +[package.metadata.release] +release = false diff --git a/release.sh b/release.sh new file mode 100755 index 000000000..7aa086742 --- /dev/null +++ b/release.sh @@ -0,0 +1,170 @@ +#!/usr/bin/env bash + +################################################################################################### +# USAGE: +# 1. on a release branch: ./release.sh +# 2. on main branch (after merging release branch): ./release.sh +################################################################################################### + +# This is a script to automate a large portion of the release process for the crates we publish to +# crates.io. Currently only `delta_kernel` (in the kernel/ dir) and `delta_kernel_derive` (in the +# derive-macros/ dir) are released. + +# Exit on error, undefined variables, and pipe failures +set -euo pipefail + +# print commands before executing them for debugging +# set -x + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # no color + +log_info() { echo -e "${BLUE}[INFO]${NC} $1"; } +log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; } +log_warning() { echo -e "${YELLOW}[WARNING]${NC} $1"; } +log_error() { echo -e "${RED}[ERROR]${NC} $1"; exit 1; } + +check_requirements() { + log_info "Checking required tools..." + + command -v cargo >/dev/null 2>&1 || log_error "cargo is required but not installed" + command -v git >/dev/null 2>&1 || log_error "git is required but not installed" + command -v cargo-release >/dev/null 2>&1 || log_error "cargo-release is required but not installed. Install with: cargo install cargo-release" + command -v git-cliff >/dev/null 2>&1 || log_error "git-cliff is required but not installed. Install with: cargo install git-cliff" + command -v jq >/dev/null 2>&1 || log_error "jq is required but not installed." + + log_success "All required tools are available" +} + +is_main_branch() { + local current_branch + current_branch=$(git rev-parse --abbrev-ref HEAD) + [[ "$current_branch" == "main" ]] +} + +is_working_tree_clean() { + git diff --quiet && git diff --cached --quiet +} + +# check if the version is already published on crates.io +is_version_published() { + local crate_name="$1" + local version + version=get_current_version "$crate_name" + + if [[ -z "$version" ]]; then + log_error "Could not find crate '$crate_name' in workspace" + fi + + if cargo search "$crate_name" | grep -q "^$crate_name = \"$version\""; then + return 0 + else + return 1 + fi +} + +# get current version from Cargo.toml +get_current_version() { + local crate_name="$1" + cargo metadata --no-deps --format-version 1 | \ + jq -r --arg name "$crate_name" '.packages[] | select(.name == $name) | .version' +} + +# Prompt user for confirmation +confirm() { + local prompt="$1" + local response + + echo -e -n "${YELLOW}${prompt} [y/N]${NC} " + read -r response + + [[ "$response" =~ ^[Yy] ]] +} + +# handle release branch workflow (CHANGELOG updates, README updates, PR to main) +handle_release_branch() { + local version="$1" + + log_info "Starting release preparation for version $version..." + + # Update CHANGELOG and README + log_info "Updating CHANGELOG.md and README.md..." + if ! cargo release --workspace "$version" --no-publish --no-push --no-tag --execute; then + log_error "Failed to update CHANGELOG and README" + fi + + if confirm "Print diff of CHANGELOG/README changes?"; then + git diff --stat HEAD^ + git diff HEAD^ + fi + + if confirm "Would you like to push these changes to 'origin' remote?"; then + local current_branch + current_branch=$(git rev-parse --abbrev-ref HEAD) + + log_info "Pushing changes to remote..." + git push origin "$current_branch" + + if confirm "Would you like to create a PR to merge this release into 'main'?"; then + if command -v gh >/dev/null 2>&1; then + gh pr create --title "release $version" --body "release $version" + log_success "PR created successfully" + else + log_warning "GitHub CLI not found. Please create a PR manually." + fi + fi + fi +} + +# Handle main branch workflow (publish and tag) +handle_main_branch() { + # could potentially just use full 'cargo release' command here + publish "delta_kernel_derive" "$current_version" + publish "delta_kernel" "$current_version" + + if confirm "Would you like to tag this release?"; then + log_info "Tagging release $current_version..." + git tag -a "v$current_version" -m "Release $current_version" + git push upstream "v$current_version" + log_success "Tagged release $current_version" + fi +} + +publish() { + local crate_name="$1" + local current_version + current_version=$(get_current_version "$crate_name") + + if is_version_published "delta_kernel_derive"; then + log_error "delta_kernel_derive version $current_version is already published to crates.io" + fi + log_info "[DRY RUN] Publishing $crate_name version $version to crates.io..." + if ! cargo publish --dry-run -p "$crate_name"; then + log_error "Failed to publish $crate_name to crates.io" + fi + + if confirm "Dry run complete. Continue with publishing?"; then + log_info "Publishing $crate_name version $version to crates.io..." + if ! cargo publish -p "$crate_name"; then + log_error "Failed to publish $crate_name to crates.io" + fi + log_success "Successfully published $crate_name version $version to crates.io" + fi +} + +check_requirements + +if is_main_branch; then + if [[ $# -ne 0 ]]; then + log_error "Version argument not expected on main branch\nUsage: $0" + fi + handle_main_branch +else + if [[ $# -ne 1 ]]; then + log_error "Version argument required when on release branch\nUsage: $0 " + fi + handle_release_branch "$1" +fi diff --git a/release.toml b/release.toml new file mode 100644 index 000000000..b04bd6b47 --- /dev/null +++ b/release.toml @@ -0,0 +1,3 @@ +tag = false +publish = false +pre-release-commit-message = "release {{version}}" diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml index 1bc5e41f8..0a90e96ed 100644 --- a/test-utils/Cargo.toml +++ b/test-utils/Cargo.toml @@ -8,6 +8,9 @@ repository.workspace = true readme.workspace = true version.workspace = true +[package.metadata.release] +release = false + [dependencies] arrow-array = { workspace = true, features = ["chrono-tz"] } arrow-schema = { workspace = true } From e6aefda96a620ae4b4fb2752afc20e175322a392 Mon Sep 17 00:00:00 2001 From: Zach Schuermann Date: Fri, 10 Jan 2025 16:17:44 -0800 Subject: [PATCH 3/5] release 0.6.1 (#637) release 0.6.1 --- CHANGELOG.md | 34 ++++++++++++++++++++++++++++++++++ Cargo.toml | 2 +- README.md | 4 ++-- cliff.toml | 4 ++-- ffi/Cargo.toml | 2 +- kernel/Cargo.toml | 2 +- release.sh | 12 +++++++++++- 7 files changed, 52 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e8e4f38f7..4d403a1fd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,39 @@ # Changelog +## [v0.6.1](https://github.com/delta-io/delta-kernel-rs/tree/v0.6.1/) (2025-01-10) + +[Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.6.0...v0.6.1) + + +### ๐Ÿš€ Features / new APIs + +1. New feature flag `default-engine-rustls` ([#572]) + +### ๐Ÿ› Bug Fixes + +1. Allow partition value timestamp to be ISO8601 formatted string ([#622]) +2. Fix stderr output for handle tests ([#630]) + +### โš™๏ธ Chores/CI + +1. Expand the arrow version range to allow arrow v54 ([#616]) +2. Update to CodeCov @v5 ([#608]) + +### Other + +1. Fix msrv check by pinning `home` dependency ([#605]) +2. Add release script ([#636]) + + +[#605]: https://github.com/delta-io/delta-kernel-rs/pull/605 +[#608]: https://github.com/delta-io/delta-kernel-rs/pull/608 +[#622]: https://github.com/delta-io/delta-kernel-rs/pull/622 +[#630]: https://github.com/delta-io/delta-kernel-rs/pull/630 +[#572]: https://github.com/delta-io/delta-kernel-rs/pull/572 +[#616]: https://github.com/delta-io/delta-kernel-rs/pull/616 +[#636]: https://github.com/delta-io/delta-kernel-rs/pull/636 + + ## [v0.6.0](https://github.com/delta-io/delta-kernel-rs/tree/v0.6.0/) (2024-12-17) [Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/v0.5.0...v0.6.0) diff --git a/Cargo.toml b/Cargo.toml index 477bb5cd5..ec7993736 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ license = "Apache-2.0" repository = "https://github.com/delta-io/delta-kernel-rs" readme = "README.md" rust-version = "1.80" -version = "0.6.0" +version = "0.6.1" [workspace.dependencies] # When changing the arrow version range, also modify ffi/Cargo.toml which has diff --git a/README.md b/README.md index 2f5565d8f..46ec1b10f 100644 --- a/README.md +++ b/README.md @@ -43,10 +43,10 @@ consumer's own `Engine` trait, the kernel has a feature flag to enable a default ```toml # fewer dependencies, requires consumer to implement Engine trait. # allows consumers to implement their own in-memory format -delta_kernel = "0.6" +delta_kernel = "0.6.1" # or turn on the default engine, based on arrow -delta_kernel = { version = "0.6", features = ["default-engine"] } +delta_kernel = { version = "0.6.1", features = ["default-engine"] } ``` ### Feature flags diff --git a/cliff.toml b/cliff.toml index f629cc3fd..cf8ae7a63 100644 --- a/cliff.toml +++ b/cliff.toml @@ -6,9 +6,9 @@ header = """ """ # Tera template body = """ -## [{{ version }}](https://github.com/delta-io/delta-kernel-rs/tree/{{ version }}/) ({{ timestamp | date(format="%Y-%m-%d") }}) +## [v{{ version }}](https://github.com/delta-io/delta-kernel-rs/tree/v{{ version }}/) ({{ timestamp | date(format="%Y-%m-%d") }}) -[Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/{{ previous.version }}...{{ version }}) +[Full Changelog](https://github.com/delta-io/delta-kernel-rs/compare/{{ previous.version }}...v{{ version }}) {% for group, commits in commits | group_by(attribute="group") %} ### {{ group | striptags | trim | upper_first }} diff --git a/ffi/Cargo.toml b/ffi/Cargo.toml index d0904c24f..aa4edc167 100644 --- a/ffi/Cargo.toml +++ b/ffi/Cargo.toml @@ -24,7 +24,7 @@ url = "2" delta_kernel = { path = "../kernel", default-features = false, features = [ "developer-visibility", ] } -delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.6.0" } +delta_kernel_ffi_macros = { path = "../ffi-proc-macros", version = "0.6.1" } # used if we use the default engine to be able to move arrow data into the c-ffi format arrow-schema = { version = ">=53, <55", default-features = false, features = [ diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index e56608a33..a045153cf 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -39,7 +39,7 @@ uuid = "1.10.0" z85 = "3.0.5" # bring in our derive macros -delta_kernel_derive = { path = "../derive-macros", version = "0.6.0" } +delta_kernel_derive = { path = "../derive-macros", version = "0.6.1" } # used for developer-visibility visibility = "0.1.1" diff --git a/release.sh b/release.sh index 7aa086742..640fb4faa 100755 --- a/release.sh +++ b/release.sh @@ -2,7 +2,7 @@ ################################################################################################### # USAGE: -# 1. on a release branch: ./release.sh +# 1. on a release branch: ./release.sh (example: ./release.sh 0.1.0) # 2. on main branch (after merging release branch): ./release.sh ################################################################################################### @@ -155,6 +155,15 @@ publish() { fi } + +validate_version() { + local version=$1 + # Check if version starts with a number + if [[ ! $version =~ ^[0-9] ]]; then + log_error "Version must start with a number (e.g., '0.1.1'). Got: '$version'" + fi +} + check_requirements if is_main_branch; then @@ -166,5 +175,6 @@ else if [[ $# -ne 1 ]]; then log_error "Version argument required when on release branch\nUsage: $0 " fi + validate_version "$1" handle_release_branch "$1" fi From 350d5b26cfa24b3e8f27355259724ebe7836f100 Mon Sep 17 00:00:00 2001 From: Zach Schuermann Date: Mon, 13 Jan 2025 11:42:11 -0800 Subject: [PATCH 4/5] fix: release script publishing fixes (#638) release script publishing fixes --- release.sh | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/release.sh b/release.sh index 640fb4faa..b16605c9d 100755 --- a/release.sh +++ b/release.sh @@ -53,7 +53,7 @@ is_working_tree_clean() { is_version_published() { local crate_name="$1" local version - version=get_current_version "$crate_name" + version=$(get_current_version "$crate_name") if [[ -z "$version" ]]; then log_error "Could not find crate '$crate_name' in workspace" @@ -122,14 +122,20 @@ handle_release_branch() { # Handle main branch workflow (publish and tag) handle_main_branch() { # could potentially just use full 'cargo release' command here - publish "delta_kernel_derive" "$current_version" - publish "delta_kernel" "$current_version" + publish "delta_kernel_derive" + publish "delta_kernel" + + # hack: just redo getting the version + local version + version=$(get_current_version "delta_kernel") if confirm "Would you like to tag this release?"; then - log_info "Tagging release $current_version..." - git tag -a "v$current_version" -m "Release $current_version" - git push upstream "v$current_version" - log_success "Tagged release $current_version" + log_info "Tagging release $version..." + if confirm "Tagging as v$version. continue?"; then + git tag -a "v$version" -m "Release v$version" + git push upstream tag "v$version" + log_success "Tagged release $version" + fi fi } @@ -138,20 +144,20 @@ publish() { local current_version current_version=$(get_current_version "$crate_name") - if is_version_published "delta_kernel_derive"; then - log_error "delta_kernel_derive version $current_version is already published to crates.io" + if is_version_published "$crate_name"; then + log_error "$crate_name version $current_version is already published to crates.io" fi - log_info "[DRY RUN] Publishing $crate_name version $version to crates.io..." + log_info "[DRY RUN] Publishing $crate_name version $current_version to crates.io..." if ! cargo publish --dry-run -p "$crate_name"; then log_error "Failed to publish $crate_name to crates.io" fi if confirm "Dry run complete. Continue with publishing?"; then - log_info "Publishing $crate_name version $version to crates.io..." + log_info "Publishing $crate_name version $current_version to crates.io..." if ! cargo publish -p "$crate_name"; then log_error "Failed to publish $crate_name to crates.io" fi - log_success "Successfully published $crate_name version $version to crates.io" + log_success "Successfully published $crate_name version $current_version to crates.io" fi } From 12020d81a510433a1e78241b7c77c99529c77b32 Mon Sep 17 00:00:00 2001 From: Robert Pack <42610831+roeap@users.noreply.github.com> Date: Tue, 14 Jan 2025 17:42:21 +0100 Subject: [PATCH 5/5] chore: fix some typos (#643) ## What changes are proposed in this pull request? Recently installed a spell checker in my IDE and thought I'd fix the findings ... ## How was this change tested? current tests, no changes to code / logic. Signed-off-by: Robert Pack --- .gitignore | 1 + README.md | 10 +++++----- ffi/examples/read-table/arrow.c | 4 ++-- ffi/examples/read-table/read_table.c | 2 +- ffi/src/engine_funcs.rs | 2 +- ffi/src/expressions/kernel.rs | 2 +- ffi/src/handle.rs | 4 ++-- ffi/src/scan.rs | 2 +- ffi/src/test_ffi.rs | 2 +- integration-tests/src/main.rs | 4 ++-- kernel/examples/inspect-table/src/main.rs | 2 +- kernel/examples/read-table-multi-threaded/src/main.rs | 6 +++--- kernel/examples/read-table-single-threaded/src/main.rs | 2 +- kernel/src/actions/visitors.rs | 2 +- kernel/src/engine/arrow_utils.rs | 6 +++--- kernel/src/engine/default/json.rs | 2 +- kernel/src/engine/default/parquet.rs | 8 ++++---- kernel/src/engine/sync/parquet.rs | 5 ++--- kernel/src/engine_data.rs | 2 +- kernel/src/error.rs | 6 +++--- kernel/src/expressions/mod.rs | 2 +- kernel/src/expressions/scalars.rs | 2 +- kernel/src/lib.rs | 4 ++-- kernel/src/predicates/parquet_stats_skipping/tests.rs | 4 ++-- kernel/src/predicates/tests.rs | 2 +- kernel/src/scan/data_skipping.rs | 2 +- kernel/src/schema.rs | 2 +- kernel/src/transaction.rs | 2 +- kernel/tests/golden_tables.rs | 8 ++++---- 29 files changed, 51 insertions(+), 51 deletions(-) diff --git a/.gitignore b/.gitignore index a7faecad9..11bf875df 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ .idea/ .vscode/ .vim +.zed # Rust .cargo/ diff --git a/README.md b/README.md index 46ec1b10f..6e25a2ddb 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Delta-kernel-rs is split into a few different crates: - kernel: The actual core kernel crate - acceptance: Acceptance tests that validate correctness via the [Delta Acceptance Tests][dat] - derive-macros: A crate for our [derive-macros] to live in -- ffi: Functionallity that enables delta-kernel-rs to be used from `C` or `C++` See the [ffi](ffi) +- ffi: Functionality that enables delta-kernel-rs to be used from `C` or `C++` See the [ffi](ffi) directory for more information. ## Building @@ -66,12 +66,12 @@ are still unstable. We therefore may break APIs within minor releases (that is, we will not break APIs in patch releases (`0.1.0` -> `0.1.1`). ## Arrow versioning -If you enable the `default-engine` or `sync-engine` features, you get an implemenation of the +If you enable the `default-engine` or `sync-engine` features, you get an implementation of the `Engine` trait that uses [Arrow] as its data format. The [`arrow crate`](https://docs.rs/arrow/latest/arrow/) tends to release new major versions rather quickly. To enable engines that already integrate arrow to also integrate kernel and not force them -to track a specific version of arrow that kernel depends on, we take as broad dependecy on arrow +to track a specific version of arrow that kernel depends on, we take as broad dependency on arrow versions as we can. This means you can force kernel to rely on the specific arrow version that your engine already uses, @@ -96,7 +96,7 @@ arrow-schema = "53.0" parquet = "53.0" ``` -Note that unfortunatly patching in `cargo` requires that _exactly one_ version matches your +Note that unfortunately patching in `cargo` requires that _exactly one_ version matches your specification. If only arrow "53.0.0" had been released the above will work, but if "53.0.1" where to be released, the specification will break and you will need to provide a more restrictive specification like `"=53.0.0"`. @@ -111,7 +111,7 @@ and then checking what version of `object_store` it depends on. ## Documentation - [API Docs](https://docs.rs/delta_kernel/latest/delta_kernel/) -- [arcitecture.md](doc/architecture.md) document describing the kernel architecture (currently wip) +- [architecture.md](doc/architecture.md) document describing the kernel architecture (currently wip) ## Examples diff --git a/ffi/examples/read-table/arrow.c b/ffi/examples/read-table/arrow.c index d58a2fa2d..7eb32b7c3 100644 --- a/ffi/examples/read-table/arrow.c +++ b/ffi/examples/read-table/arrow.c @@ -97,7 +97,7 @@ static GArrowRecordBatch* add_partition_columns( } GArrowArray* partition_col = garrow_array_builder_finish((GArrowArrayBuilder*)builder, &error); - if (report_g_error("Can't build string array for parition column", error)) { + if (report_g_error("Can't build string array for partition column", error)) { printf("Giving up on column %s\n", col); g_error_free(error); g_object_unref(builder); @@ -144,7 +144,7 @@ static void add_batch_to_context( } record_batch = add_partition_columns(record_batch, partition_cols, partition_values); if (record_batch == NULL) { - printf("Failed to add parition columns, not adding batch\n"); + printf("Failed to add partition columns, not adding batch\n"); return; } context->batches = g_list_append(context->batches, record_batch); diff --git a/ffi/examples/read-table/read_table.c b/ffi/examples/read-table/read_table.c index 0aa8caa41..7b1a7f2c7 100644 --- a/ffi/examples/read-table/read_table.c +++ b/ffi/examples/read-table/read_table.c @@ -43,7 +43,7 @@ void print_partition_info(struct EngineContext* context, const CStringMap* parti } // Kernel will call this function for each file that should be scanned. The arguments include enough -// context to constuct the correct logical data from the physically read parquet +// context to construct the correct logical data from the physically read parquet void scan_row_callback( void* engine_context, KernelStringSlice path, diff --git a/ffi/src/engine_funcs.rs b/ffi/src/engine_funcs.rs index f8534dfc0..1afb60510 100644 --- a/ffi/src/engine_funcs.rs +++ b/ffi/src/engine_funcs.rs @@ -42,7 +42,7 @@ impl Drop for FileReadResultIterator { } } -/// Call the engine back with the next `EngingeData` batch read by Parquet/Json handler. The +/// Call the engine back with the next `EngineData` batch read by Parquet/Json handler. The /// _engine_ "owns" the data that is passed into the `engine_visitor`, since it is allocated by the /// `Engine` being used for log-replay. If the engine wants the kernel to free this data, it _must_ /// call [`free_engine_data`] on it. diff --git a/ffi/src/expressions/kernel.rs b/ffi/src/expressions/kernel.rs index f2ed8b1a3..a5116db47 100644 --- a/ffi/src/expressions/kernel.rs +++ b/ffi/src/expressions/kernel.rs @@ -83,7 +83,7 @@ pub struct EngineExpressionVisitor { /// Visit a 64bit timestamp belonging to the list identified by `sibling_list_id`. /// The timestamp is microsecond precision with no timezone. pub visit_literal_timestamp_ntz: VisitLiteralFn, - /// Visit a 32bit intger `date` representing days since UNIX epoch 1970-01-01. The `date` belongs + /// Visit a 32bit integer `date` representing days since UNIX epoch 1970-01-01. The `date` belongs /// to the list identified by `sibling_list_id`. pub visit_literal_date: VisitLiteralFn, /// Visit binary data at the `buffer` with length `len` belonging to the list identified by diff --git a/ffi/src/handle.rs b/ffi/src/handle.rs index 27b35bea5..30b695ecc 100644 --- a/ffi/src/handle.rs +++ b/ffi/src/handle.rs @@ -2,8 +2,8 @@ //! boundary. //! //! Creating a [`Handle`] always implies some kind of ownership transfer. A mutable handle takes -//! ownership of the object itself (analagous to [`Box`]), while a non-mutable (shared) handle -//! takes ownership of a shared reference to the object (analagous to [`std::sync::Arc`]). Thus, a created +//! ownership of the object itself (analogous to [`Box`]), while a non-mutable (shared) handle +//! takes ownership of a shared reference to the object (analogous to [`std::sync::Arc`]). Thus, a created //! handle remains [valid][Handle#Validity], and its underlying object remains accessible, until the //! handle is explicitly dropped or consumed. Dropping a mutable handle always drops the underlying //! object as well; dropping a shared handle only drops the underlying object if the handle was the diff --git a/ffi/src/scan.rs b/ffi/src/scan.rs index d5695c130..86f5e7e5f 100644 --- a/ffi/src/scan.rs +++ b/ffi/src/scan.rs @@ -383,7 +383,7 @@ struct ContextWrapper { /// data which provides the data handle and selection vector as each element in the iterator. /// /// # Safety -/// engine is responsbile for passing a valid [`ExclusiveEngineData`] and selection vector. +/// engine is responsible for passing a valid [`ExclusiveEngineData`] and selection vector. #[no_mangle] pub unsafe extern "C" fn visit_scan_data( data: Handle, diff --git a/ffi/src/test_ffi.rs b/ffi/src/test_ffi.rs index 27c7063fa..14eec1b86 100644 --- a/ffi/src/test_ffi.rs +++ b/ffi/src/test_ffi.rs @@ -12,7 +12,7 @@ use delta_kernel::{ /// output expression can be found in `ffi/tests/test_expression_visitor/expected.txt`. /// /// # Safety -/// The caller is responsible for freeing the retured memory, either by calling +/// The caller is responsible for freeing the returned memory, either by calling /// [`free_kernel_predicate`], or [`Handle::drop_handle`] #[no_mangle] pub unsafe extern "C" fn get_testing_kernel_expression() -> Handle { diff --git a/integration-tests/src/main.rs b/integration-tests/src/main.rs index 63adb3940..a5bfe0952 100644 --- a/integration-tests/src/main.rs +++ b/integration-tests/src/main.rs @@ -15,8 +15,8 @@ fn create_kernel_schema() -> delta_kernel::schema::Schema { fn main() { let arrow_schema = create_arrow_schema(); let kernel_schema = create_kernel_schema(); - let convereted: delta_kernel::schema::Schema = + let converted: delta_kernel::schema::Schema = delta_kernel::schema::Schema::try_from(&arrow_schema).expect("couldn't convert"); - assert!(kernel_schema == convereted); + assert!(kernel_schema == converted); println!("Okay, made it"); } diff --git a/kernel/examples/inspect-table/src/main.rs b/kernel/examples/inspect-table/src/main.rs index ea25a8404..194530004 100644 --- a/kernel/examples/inspect-table/src/main.rs +++ b/kernel/examples/inspect-table/src/main.rs @@ -184,7 +184,7 @@ fn print_scan_file( fn try_main() -> DeltaResult<()> { let cli = Cli::parse(); - // build a table and get the lastest snapshot from it + // build a table and get the latest snapshot from it let table = Table::try_from_uri(&cli.path)?; let engine = DefaultEngine::try_new( diff --git a/kernel/examples/read-table-multi-threaded/src/main.rs b/kernel/examples/read-table-multi-threaded/src/main.rs index d97b6c2d3..e689a4ef4 100644 --- a/kernel/examples/read-table-multi-threaded/src/main.rs +++ b/kernel/examples/read-table-multi-threaded/src/main.rs @@ -104,7 +104,7 @@ fn truncate_batch(batch: RecordBatch, rows: usize) -> RecordBatch { RecordBatch::try_new(batch.schema(), cols).unwrap() } -// This is the callback that will be called fo each valid scan row +// This is the callback that will be called for each valid scan row fn send_scan_file( scan_tx: &mut spmc::Sender, path: &str, @@ -125,7 +125,7 @@ fn send_scan_file( fn try_main() -> DeltaResult<()> { let cli = Cli::parse(); - // build a table and get the lastest snapshot from it + // build a table and get the latest snapshot from it let table = Table::try_from_uri(&cli.path)?; println!("Reading {}", table.location()); @@ -279,7 +279,7 @@ fn do_work( // this example uses the parquet_handler from the engine, but an engine could // choose to use whatever method it might want to read a parquet file. The reader - // could, for example, fill in the parition columns, or apply deletion vectors. Here + // could, for example, fill in the partition columns, or apply deletion vectors. Here // we assume a more naive parquet reader and fix the data up after the fact. // further parallelism would also be possible here as we could read the parquet file // in chunks where each thread reads one chunk. The engine would need to ensure diff --git a/kernel/examples/read-table-single-threaded/src/main.rs b/kernel/examples/read-table-single-threaded/src/main.rs index 32ad3173d..9bbc9476d 100644 --- a/kernel/examples/read-table-single-threaded/src/main.rs +++ b/kernel/examples/read-table-single-threaded/src/main.rs @@ -69,7 +69,7 @@ fn main() -> ExitCode { fn try_main() -> DeltaResult<()> { let cli = Cli::parse(); - // build a table and get the lastest snapshot from it + // build a table and get the latest snapshot from it let table = Table::try_from_uri(&cli.path)?; println!("Reading {}", table.location()); diff --git a/kernel/src/actions/visitors.rs b/kernel/src/actions/visitors.rs index 7d4be1a82..0cd12ce50 100644 --- a/kernel/src/actions/visitors.rs +++ b/kernel/src/actions/visitors.rs @@ -367,7 +367,7 @@ impl RowVisitor for CdcVisitor { pub type SetTransactionMap = HashMap; -/// Extact application transaction actions from the log into a map +/// Extract application transaction actions from the log into a map /// /// This visitor maintains the first entry for each application id it /// encounters. When a specific application id is required then diff --git a/kernel/src/engine/arrow_utils.rs b/kernel/src/engine/arrow_utils.rs index 4700b72c0..a3e184574 100644 --- a/kernel/src/engine/arrow_utils.rs +++ b/kernel/src/engine/arrow_utils.rs @@ -55,9 +55,9 @@ macro_rules! prim_array_cmp { pub(crate) use prim_array_cmp; -/// Get the indicies in `parquet_schema` of the specified columns in `requested_schema`. This -/// returns a tuples of (mask_indicies: Vec, reorder_indicies: -/// Vec). `mask_indicies` is used for generating the mask for reading from the +/// Get the indices in `parquet_schema` of the specified columns in `requested_schema`. This +/// returns a tuples of (mask_indices: Vec, reorder_indices: +/// Vec). `mask_indices` is used for generating the mask for reading from the pub(crate) fn make_arrow_error(s: impl Into) -> Error { Error::Arrow(arrow_schema::ArrowError::InvalidArgumentError(s.into())).with_backtrace() } diff --git a/kernel/src/engine/default/json.rs b/kernel/src/engine/default/json.rs index 1912a7b34..ab296e12a 100644 --- a/kernel/src/engine/default/json.rs +++ b/kernel/src/engine/default/json.rs @@ -29,7 +29,7 @@ pub struct DefaultJsonHandler { store: Arc, /// The executor to run async tasks on task_executor: Arc, - /// The maximun number of batches to read ahead + /// The maximum number of batches to read ahead readahead: usize, /// The number of rows to read per batch batch_size: usize, diff --git a/kernel/src/engine/default/parquet.rs b/kernel/src/engine/default/parquet.rs index 1acc4ef4a..a65d329a2 100644 --- a/kernel/src/engine/default/parquet.rs +++ b/kernel/src/engine/default/parquet.rs @@ -258,7 +258,7 @@ impl FileOpener for ParquetOpener { let mut reader = ParquetObjectReader::new(store, meta); let metadata = ArrowReaderMetadata::load_async(&mut reader, Default::default()).await?; let parquet_schema = metadata.schema(); - let (indicies, requested_ordering) = + let (indices, requested_ordering) = get_requested_indices(&table_schema, parquet_schema)?; let options = ArrowReaderOptions::new(); //.with_page_index(enable_page_index); let mut builder = @@ -267,7 +267,7 @@ impl FileOpener for ParquetOpener { &table_schema, parquet_schema, builder.parquet_schema(), - &indicies, + &indices, ) { builder = builder.with_projection(mask) } @@ -330,7 +330,7 @@ impl FileOpener for PresignedUrlOpener { let reader = client.get(file_meta.location).send().await?.bytes().await?; let metadata = ArrowReaderMetadata::load(&reader, Default::default())?; let parquet_schema = metadata.schema(); - let (indicies, requested_ordering) = + let (indices, requested_ordering) = get_requested_indices(&table_schema, parquet_schema)?; let options = ArrowReaderOptions::new(); @@ -340,7 +340,7 @@ impl FileOpener for PresignedUrlOpener { &table_schema, parquet_schema, builder.parquet_schema(), - &indicies, + &indices, ) { builder = builder.with_projection(mask) } diff --git a/kernel/src/engine/sync/parquet.rs b/kernel/src/engine/sync/parquet.rs index 2a54e2e86..260ef321b 100644 --- a/kernel/src/engine/sync/parquet.rs +++ b/kernel/src/engine/sync/parquet.rs @@ -21,9 +21,8 @@ fn try_create_from_parquet( let metadata = ArrowReaderMetadata::load(&file, Default::default())?; let parquet_schema = metadata.schema(); let mut builder = ParquetRecordBatchReaderBuilder::try_new(file)?; - let (indicies, requested_ordering) = get_requested_indices(&schema, parquet_schema)?; - if let Some(mask) = generate_mask(&schema, parquet_schema, builder.parquet_schema(), &indicies) - { + let (indices, requested_ordering) = get_requested_indices(&schema, parquet_schema)?; + if let Some(mask) = generate_mask(&schema, parquet_schema, builder.parquet_schema(), &indices) { builder = builder.with_projection(mask); } if let Some(predicate) = predicate { diff --git a/kernel/src/engine_data.rs b/kernel/src/engine_data.rs index e421d0ad6..25a7e84bd 100644 --- a/kernel/src/engine_data.rs +++ b/kernel/src/engine_data.rs @@ -199,7 +199,7 @@ pub trait RowVisitor { /// "getter" of type [`GetData`] will be present. This can be used to actually get at the data /// for each row. You can `use` the `TypedGetData` trait if you want to have a way to extract /// typed data that will fail if the "getter" is for an unexpected type. The data in `getters` - /// does not outlive the call to this funtion (i.e. it should be copied if needed). + /// does not outlive the call to this function (i.e. it should be copied if needed). fn visit<'a>(&mut self, row_count: usize, getters: &[&'a dyn GetData<'a>]) -> DeltaResult<()>; /// Visit the rows of an [`EngineData`], selecting the leaf column names given by diff --git a/kernel/src/error.rs b/kernel/src/error.rs index e3230aeb9..815ef3e51 100644 --- a/kernel/src/error.rs +++ b/kernel/src/error.rs @@ -1,4 +1,4 @@ -//! Defintions of errors that the delta kernel can encounter +//! Definitions of errors that the delta kernel can encounter use std::{ backtrace::{Backtrace, BacktraceStatus}, @@ -58,7 +58,7 @@ pub enum Error { #[error("Internal error {0}. This is a kernel bug, please report.")] InternalError(String), - /// An error enountered while working with parquet data + /// An error encountered while working with parquet data #[cfg(feature = "parquet")] #[error("Arrow error: {0}")] Parquet(#[from] parquet::errors::ParquetError), @@ -99,7 +99,7 @@ pub enum Error { #[error("No table version found.")] MissingVersion, - /// An error occured while working with deletion vectors + /// An error occurred while working with deletion vectors #[error("Deletion Vector error: {0}")] DeletionVector(String), diff --git a/kernel/src/expressions/mod.rs b/kernel/src/expressions/mod.rs index bad20aea4..620142679 100644 --- a/kernel/src/expressions/mod.rs +++ b/kernel/src/expressions/mod.rs @@ -737,7 +737,7 @@ mod tests { ), ]); - // Similer to ExpressionDepthChecker::check, but also returns call count + // Similar to ExpressionDepthChecker::check, but also returns call count let check_with_call_count = |depth_limit| ExpressionDepthChecker::check_with_call_count(&expr, depth_limit); diff --git a/kernel/src/expressions/scalars.rs b/kernel/src/expressions/scalars.rs index 5283c08c5..2ce2fd41a 100644 --- a/kernel/src/expressions/scalars.rs +++ b/kernel/src/expressions/scalars.rs @@ -393,7 +393,7 @@ impl PrimitiveType { // Timestamps may additionally be encoded as a ISO 8601 formatted string such as // `1970-01-01T00:00:00.123456Z`. // - // The difference arrises mostly in how they are to be handled on the engine side - i.e. timestampNTZ + // The difference arises mostly in how they are to be handled on the engine side - i.e. timestampNTZ // is not adjusted to UTC, this is just so we can (de-)serialize it as a date sting. // https://github.com/delta-io/delta/blob/master/PROTOCOL.md#partition-value-serialization TimestampNtz | Timestamp => { diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index f27907bcd..fa88e7afa 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -202,7 +202,7 @@ impl FileMeta { /// let b: Arc = a.downcast().unwrap(); /// ``` /// -/// In contrast, very similer code that relies only on `Any` would fail to compile: +/// In contrast, very similar code that relies only on `Any` would fail to compile: /// /// ```fail_compile /// # use std::any::Any; @@ -404,7 +404,7 @@ pub trait JsonHandler: AsAny { /// /// - `path` - URL specifying the location to write the JSON file /// - `data` - Iterator of EngineData to write to the JSON file. Each row should be written as - /// a new JSON object appended to the file. (that is, the file is newline-delimeted JSON, and + /// a new JSON object appended to the file. (that is, the file is newline-delimited JSON, and /// each row is a JSON object on a single line) /// - `overwrite` - If true, overwrite the file if it exists. If false, the call must fail if /// the file exists. diff --git a/kernel/src/predicates/parquet_stats_skipping/tests.rs b/kernel/src/predicates/parquet_stats_skipping/tests.rs index b1de88e6b..50833a166 100644 --- a/kernel/src/predicates/parquet_stats_skipping/tests.rs +++ b/kernel/src/predicates/parquet_stats_skipping/tests.rs @@ -299,7 +299,7 @@ fn test_sql_where() { "WHERE {TRUE} < {FALSE}" ); - // Constrast normal vs SQL WHERE semantics - comparison + // Contrast normal vs SQL WHERE semantics - comparison expect_eq!( AllNullTestFilter.eval_expr(&Expr::lt(col.clone(), VAL), false), None, @@ -321,7 +321,7 @@ fn test_sql_where() { "WHERE {VAL} < {col}" ); - // Constrast normal vs SQL WHERE semantics - comparison inside AND + // Contrast normal vs SQL WHERE semantics - comparison inside AND expect_eq!( AllNullTestFilter.eval_expr(&Expr::and(NULL, Expr::lt(col.clone(), VAL)), false), None, diff --git a/kernel/src/predicates/tests.rs b/kernel/src/predicates/tests.rs index ce273e7b8..fa4aec191 100644 --- a/kernel/src/predicates/tests.rs +++ b/kernel/src/predicates/tests.rs @@ -51,7 +51,7 @@ fn test_default_eval_scalar() { } } -// verifies that partial orderings behave as excpected for all Scalar types +// verifies that partial orderings behave as expected for all Scalar types #[test] fn test_default_partial_cmp_scalars() { use Ordering::*; diff --git a/kernel/src/scan/data_skipping.rs b/kernel/src/scan/data_skipping.rs index 54eb5344c..847855d4a 100644 --- a/kernel/src/scan/data_skipping.rs +++ b/kernel/src/scan/data_skipping.rs @@ -24,7 +24,7 @@ mod tests; /// Returns `None` if the predicate is not eligible for data skipping. /// /// We normalize each binary operation to a comparison between a column and a literal value and -/// rewite that in terms of the min/max values of the column. +/// rewrite that in terms of the min/max values of the column. /// For example, `1 < a` is rewritten as `minValues.a > 1`. /// /// For Unary `Not`, we push the Not down using De Morgan's Laws to invert everything below the Not. diff --git a/kernel/src/schema.rs b/kernel/src/schema.rs index 42901751f..9cb6769f9 100644 --- a/kernel/src/schema.rs +++ b/kernel/src/schema.rs @@ -1156,7 +1156,7 @@ mod tests { ), ]); - // Similer to SchemaDepthChecker::check, but also returns call count + // Similar to SchemaDepthChecker::check, but also returns call count let check_with_call_count = |depth_limit| SchemaDepthChecker::check_with_call_count(&schema, depth_limit); diff --git a/kernel/src/transaction.rs b/kernel/src/transaction.rs index c6e93ea7b..c73782f64 100644 --- a/kernel/src/transaction.rs +++ b/kernel/src/transaction.rs @@ -241,7 +241,7 @@ impl WriteContext { /// Result after committing a transaction. If 'committed', the version is the new version written /// to the log. If 'conflict', the transaction is returned so the caller can resolve the conflict /// (along with the version which conflicted). -// TODO(zach): in order to make the returning of a transcation useful, we need to add APIs to +// TODO(zach): in order to make the returning of a transaction useful, we need to add APIs to // update the transaction to a new version etc. #[derive(Debug)] pub enum CommitResult { diff --git a/kernel/tests/golden_tables.rs b/kernel/tests/golden_tables.rs index cd9023db1..120271ef2 100644 --- a/kernel/tests/golden_tables.rs +++ b/kernel/tests/golden_tables.rs @@ -26,7 +26,7 @@ use delta_kernel::engine::default::DefaultEngine; mod common; use common::{load_test_data, to_arrow}; -// NB adapated from DAT: read all parquet files in the directory and concatenate them +// NB adapted from DAT: read all parquet files in the directory and concatenate them async fn read_expected(path: &Path) -> DeltaResult { let store = Arc::new(LocalFileSystem::new_with_prefix(path)?); let files = store.list(None).try_collect::>().await?; @@ -368,7 +368,7 @@ golden_test!("deltalog-getChanges", latest_snapshot_test); golden_test!("dv-partitioned-with-checkpoint", latest_snapshot_test); golden_test!("dv-with-columnmapping", latest_snapshot_test); -skip_test!("hive": "test not yet implmented - different file structure"); +skip_test!("hive": "test not yet implemented - different file structure"); golden_test!("kernel-timestamp-int96", latest_snapshot_test); golden_test!("kernel-timestamp-pst", latest_snapshot_test); golden_test!("kernel-timestamp-timestamp_micros", latest_snapshot_test); @@ -436,11 +436,11 @@ skip_test!("canonicalized-paths-special-b": "BUG: path canonicalization"); // // We added two add files with the same path `foo`. The first should have been removed. // // The second should remain, and should have a hard-coded modification time of 1700000000000L // assert(foundFiles.find(_.getPath.endsWith("foo")).exists(_.getModificationTime == 1700000000000L)) -skip_test!("delete-re-add-same-file-different-transactions": "test not yet implmented"); +skip_test!("delete-re-add-same-file-different-transactions": "test not yet implemented"); // data file doesn't exist, get the relative path to compare // assert(new File(addFileStatus.getPath).getName == "special p@#h") -skip_test!("log-replay-special-characters-b": "test not yet implmented"); +skip_test!("log-replay-special-characters-b": "test not yet implemented"); negative_test!("deltalog-invalid-protocol-version"); negative_test!("deltalog-state-reconstruction-from-checkpoint-missing-metadata");