Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
205 commits
Select commit Hold shift + click to select a range
435302e
introduce visitors
sebastiantia Mar 12, 2025
e500a10
remove pub
sebastiantia Mar 12, 2025
19733cd
assert! instead of assert_eq with bool
sebastiantia Mar 12, 2025
87c9f31
log replay for checkpoints
sebastiantia Mar 13, 2025
db5ccd0
rename & some clean up
sebastiantia Mar 13, 2025
42c08c1
remove new path for now
sebastiantia Mar 13, 2025
f91baeb
merge non file action visitor tests
sebastiantia Mar 22, 2025
9fdfba7
mvp for refactor
sebastiantia Mar 24, 2025
d420fd1
these github action checks clog my screen
sebastiantia Mar 24, 2025
9e0e048
base file actions struct
sebastiantia Mar 25, 2025
303444b
combine visitors
sebastiantia Mar 25, 2025
5dbc924
fmt
sebastiantia Mar 26, 2025
b793961
remove old code
sebastiantia Mar 26, 2025
508976f
move FileActionKey
sebastiantia Mar 26, 2025
bccaa17
Merge branch 'main' into checkpoint-visitors
sebastiantia Mar 26, 2025
a23d7cb
merge
sebastiantia Mar 26, 2025
0160ef1
fix whitespace
sebastiantia Mar 26, 2025
aae7046
remove old code
sebastiantia Mar 26, 2025
f574370
refactor more
sebastiantia Mar 26, 2025
a618833
refactor
sebastiantia Mar 26, 2025
7da74b2
more docs
sebastiantia Mar 26, 2025
220a216
invert is_log_batch logic
sebastiantia Mar 26, 2025
9d86911
docs
sebastiantia Mar 26, 2025
e5b0e32
docs
sebastiantia Mar 26, 2025
a5393dc
docs and imports
sebastiantia Mar 26, 2025
a23c651
improve mod doc
sebastiantia Mar 27, 2025
d712d18
improve doc
sebastiantia Mar 27, 2025
e564ae1
docs'
sebastiantia Mar 27, 2025
b14ff19
docs
sebastiantia Mar 27, 2025
a52d484
update
sebastiantia Mar 27, 2025
a243a98
nits
sebastiantia Mar 27, 2025
9f06382
Revert "nits"
sebastiantia Mar 28, 2025
58f38c0
nits
sebastiantia Mar 28, 2025
628546c
refactor
sebastiantia Mar 27, 2025
88cf983
move
sebastiantia Mar 27, 2025
10bb7b5
fix rebase
sebastiantia Mar 28, 2025
4b5a3a9
introduce visitors
sebastiantia Mar 12, 2025
1cb9364
assert! instead of assert_eq with bool
sebastiantia Mar 12, 2025
797a05c
merge non file action visitor tests
sebastiantia Mar 22, 2025
45c698d
base file actions struct
sebastiantia Mar 25, 2025
b062125
combine visitors
sebastiantia Mar 25, 2025
90e46cd
fmt
sebastiantia Mar 26, 2025
3c25392
remove old code
sebastiantia Mar 26, 2025
cba8ed6
move FileActionKey
sebastiantia Mar 26, 2025
28f1fb4
fix merge
sebastiantia Mar 27, 2025
48f831a
doc
sebastiantia Mar 27, 2025
7c3d976
docs
sebastiantia Mar 28, 2025
b2bb0ce
fix rebase
sebastiantia Mar 28, 2025
0054c71
merge
sebastiantia Mar 28, 2025
abc7e1f
merge fixes
sebastiantia Mar 28, 2025
964f294
docs
sebastiantia Mar 30, 2025
c026258
clean up and docs
sebastiantia Mar 30, 2025
88ba96c
docs
sebastiantia Mar 30, 2025
4c98c84
docs
sebastiantia Mar 30, 2025
c7cd2d1
Merge branch 'extract-deduplication-logic-from-addRemoveDedupVisitor'…
sebastiantia Mar 30, 2025
542166c
merge
sebastiantia Apr 1, 2025
655ed1d
fix merge
sebastiantia Apr 1, 2025
6c222a3
crate mod
sebastiantia Apr 1, 2025
30bd7d6
dev vis
sebastiantia Apr 1, 2025
159b0dd
merge
sebastiantia Apr 1, 2025
5777e5a
improve docs
sebastiantia Apr 1, 2025
5e6695b
Merge branch 'extract-log-replay-processing-structure' into checkpoin…
sebastiantia Apr 1, 2025
bdbc3fb
docs
sebastiantia Apr 1, 2025
6491113
breaking merge
sebastiantia Apr 1, 2025
95d0164
accept metadata & protocol param
sebastiantia Apr 1, 2025
51104aa
Merge branch 'checkpoint-visitors' into checkpoint-replay
sebastiantia Apr 1, 2025
7a59eab
improve docs
sebastiantia Apr 1, 2025
e4bc34e
docs
sebastiantia Apr 1, 2025
d24a80c
refactor into checkpoint mod
sebastiantia Apr 1, 2025
1981ab4
refactor into test_utils
sebastiantia Apr 1, 2025
f084424
rebase on test-utils refactor
sebastiantia Apr 2, 2025
6a28d99
merge
sebastiantia Apr 2, 2025
3488318
merge
sebastiantia Apr 2, 2025
c4e5522
redundant docs
sebastiantia Apr 2, 2025
18d1a29
fix doc
sebastiantia Apr 2, 2025
7dccdea
wip
sebastiantia Apr 2, 2025
a9d6c81
include table API
sebastiantia Apr 2, 2025
fffd8f7
fix docs
sebastiantia Apr 2, 2025
92b7296
Merge branch 'main' into checkpoint-visitors
sebastiantia Apr 2, 2025
6167cf2
merge
sebastiantia Apr 2, 2025
0d8b3c0
hoist selection vector and data skipping filter
sebastiantia Apr 3, 2025
43760a5
docs
sebastiantia Apr 3, 2025
1137be6
refactorg
sebastiantia Apr 3, 2025
6e3d722
docs
sebastiantia Apr 3, 2025
2252cec
match simplification
sebastiantia Apr 3, 2025
09f3930
docs
sebastiantia Apr 3, 2025
3efeef6
docs and rename
sebastiantia Apr 4, 2025
63f0294
nits and renames
sebastiantia Apr 4, 2025
fab97ba
rename
sebastiantia Apr 4, 2025
f79d9a5
priv mod
sebastiantia Apr 4, 2025
568b59e
docs
sebastiantia Apr 5, 2025
bce9384
clean up docs
sebastiantia Apr 6, 2025
87b17d4
polish docs
sebastiantia Apr 6, 2025
d8df2ea
notes
sebastiantia Apr 6, 2025
7f49ccd
fix indentation
sebastiantia Apr 6, 2025
e809306
merge
sebastiantia Apr 6, 2025
c9f6edd
bool flags
sebastiantia Apr 6, 2025
3f8a69b
Merge branch 'extract-log-replay-processing-structure' into checkpoin…
sebastiantia Apr 6, 2025
e520d1f
remove atomic counters
sebastiantia Apr 6, 2025
f31e51d
box counters
sebastiantia Apr 6, 2025
79d6ff8
review
sebastiantia Apr 7, 2025
a3cf0f2
revert
sebastiantia Apr 7, 2025
326bea6
move logic to CheckpointWriter
sebastiantia Apr 7, 2025
4416968
rc<refcell>
sebastiantia Apr 7, 2025
7f7ac4e
merge
sebastiantia Apr 7, 2025
4ceaa50
update
sebastiantia Apr 7, 2025
20fe7fe
unignore
sebastiantia Apr 7, 2025
29489d7
fix docs
sebastiantia Apr 7, 2025
ae22a6b
merge
sebastiantia Apr 7, 2025
5ccde93
oops
sebastiantia Apr 7, 2025
00c834b
docs
sebastiantia Apr 7, 2025
3c11320
clean up doc & test
sebastiantia Apr 7, 2025
4f61757
clean up docs
sebastiantia Apr 7, 2025
fdd4f68
update docs
sebastiantia Apr 7, 2025
f3257b4
Merge branch 'main' into checkpoint-visitors
sebastiantia Apr 7, 2025
9c992fc
merge
sebastiantia Apr 7, 2025
09e8199
merge
sebastiantia Apr 7, 2025
72bb446
merge fix
sebastiantia Apr 7, 2025
e2ceee3
docs
sebastiantia Apr 7, 2025
1a5fcb4
remove checkpoint builder
sebastiantia Apr 8, 2025
f177492
docs
sebastiantia Apr 8, 2025
04d418e
docs
sebastiantia Apr 8, 2025
d3a97a7
priv
sebastiantia Apr 8, 2025
1adb100
tests and docs
sebastiantia Apr 8, 2025
9834c6d
fix builds
sebastiantia Apr 8, 2025
2aec9c3
remove mod docs in this PR
sebastiantia Apr 8, 2025
2e2062f
update docs
sebastiantia Apr 8, 2025
aed3ab6
remove file
sebastiantia Apr 8, 2025
c92ea56
Merge branch 'checkpoint-visitors' into checkpoint-replay
sebastiantia Apr 8, 2025
fcb289d
docs
sebastiantia Apr 8, 2025
4d2029e
docs
sebastiantia Apr 9, 2025
e0d81ab
docs
sebastiantia Apr 9, 2025
b4e28ee
review
sebastiantia Apr 9, 2025
544c42a
partial review
sebastiantia Apr 9, 2025
e8d1239
arc atomic
sebastiantia Apr 9, 2025
e9de5bc
arc
sebastiantia Apr 9, 2025
99d31a7
.finalize() with tests
sebastiantia Apr 10, 2025
0b609d5
merge
sebastiantia Apr 10, 2025
ab0a373
docs
sebastiantia Apr 10, 2025
9a9697a
test coverage
sebastiantia Apr 10, 2025
c7630a3
doc
sebastiantia Apr 10, 2025
011ec3f
merge
sebastiantia Apr 11, 2025
c58074b
fix merge
sebastiantia Apr 11, 2025
78fab5f
docs
sebastiantia Apr 11, 2025
64c720d
build & doc fixes
sebastiantia Apr 11, 2025
7c90c33
fmt
sebastiantia Apr 11, 2025
48a0153
review
sebastiantia Apr 11, 2025
4a1a1dd
schema spec
sebastiantia Apr 11, 2025
4d48a8a
pub crate
sebastiantia Apr 11, 2025
411b2c4
forgot to include this file
sebastiantia Apr 11, 2025
48d529d
review
sebastiantia Apr 14, 2025
5f55803
merge & reviews
sebastiantia Apr 14, 2025
b894936
merge
sebastiantia Apr 14, 2025
1bd9658
vis
sebastiantia Apr 14, 2025
2439ef3
compiling doc
sebastiantia Apr 15, 2025
1695392
docs
sebastiantia Apr 15, 2025
cf3faf8
CheckpointWriter error
sebastiantia Apr 15, 2025
aa01189
relaxed ordering
sebastiantia Apr 15, 2025
a9f9614
docs & last_checkpoint
sebastiantia Apr 15, 2025
14b7db7
schemas
sebastiantia Apr 15, 2025
8f985ca
test
sebastiantia Apr 15, 2025
17f1fd9
merge
sebastiantia Apr 15, 2025
9a76b22
fix flag
sebastiantia Apr 15, 2025
ffb02db
write
sebastiantia Apr 15, 2025
31da7d4
extract .finalize api to separate PR
sebastiantia Apr 15, 2025
3621121
remove comments
sebastiantia Apr 15, 2025
600cee6
doc
sebastiantia Apr 15, 2025
2371ed0
include issue
sebastiantia Apr 16, 2025
a04ca66
Merge branch 'main' into checkpoint-builder-and-writer
sebastiantia Apr 16, 2025
db38653
merge fix
sebastiantia Apr 16, 2025
d191912
reviews
sebastiantia Apr 16, 2025
2ece8a3
err handling
sebastiantia Apr 16, 2025
3d2532d
const
sebastiantia Apr 16, 2025
2d5890a
docs
sebastiantia Apr 16, 2025
a69f3f0
issue track
sebastiantia Apr 16, 2025
e84a55b
docs
sebastiantia Apr 17, 2025
457a95c
review
sebastiantia Apr 17, 2025
87aa6a9
merge
sebastiantia Apr 17, 2025
4ccf380
docs
sebastiantia Apr 17, 2025
ca6dbe3
checkpont metadata
sebastiantia Apr 18, 2025
ecaa9b0
channel instead of arc<atomic> for action counts
sebastiantia Apr 18, 2025
f3679e0
docs & fix
sebastiantia Apr 18, 2025
a6c3bf4
review
sebastiantia Apr 19, 2025
6d9c5cb
remove test file & spelling
sebastiantia Apr 21, 2025
affe0b5
reviews
sebastiantia Apr 21, 2025
37e7bea
review - remove channels, iterator as param, FileMeta instead of Engi…
sebastiantia Apr 21, 2025
ff92454
docs
sebastiantia Apr 21, 2025
83827e6
docs
sebastiantia Apr 21, 2025
87b5f1e
fix docs
sebastiantia Apr 21, 2025
b2b07ac
snapshot::checkpoint
sebastiantia Apr 22, 2025
ad8380e
docs
sebastiantia Apr 22, 2025
dbff206
doc test
sebastiantia Apr 22, 2025
7e403f1
doc
sebastiantia Apr 22, 2025
85cb57c
split checkpoint path
sebastiantia Apr 22, 2025
32e6714
reviews
sebastiantia Apr 22, 2025
de3252d
review
sebastiantia Apr 22, 2025
fe11091
review
sebastiantia Apr 22, 2025
ae86b11
docs
sebastiantia Apr 22, 2025
094f460
docs
sebastiantia Apr 22, 2025
893568a
note about not supporting uuid named checkpoints
sebastiantia Apr 22, 2025
d0b6e9b
impl Into
sebastiantia Apr 22, 2025
9e225bd
reviews
sebastiantia Apr 23, 2025
20bd8e6
Merge branch 'main' into checkpoint-builder-and-writer
sebastiantia Apr 28, 2025
be6ba68
fix merge
sebastiantia Apr 28, 2025
3efd39d
Merge branch 'main' into checkpoint-builder-and-writer
sebastiantia Apr 29, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 45 additions & 61 deletions kernel/src/checkpoint/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,22 @@
//! ## Architecture
//!
//! - [`CheckpointWriter`] - Core component that manages the checkpoint creation workflow
//! - [`CheckpointData`] - Wraps the [`CheckpointDataIterator`] and destination path information
//! - [`CheckpointDataIterator`] - Iterator over the checkpoint data to be written
//!
//! ## Usage
//!
//! The following steps outline the process of creating a checkpoint:
//!
//! 1. Create a [`CheckpointWriter`] using [`Snapshot::checkpoint`]
//! 2. Get [`CheckpointData`] from [`CheckpointWriter::checkpoint_data`]
//! 3. Write the [`CheckpointData::data`] to [`CheckpointData::path`]
//! 2. Get the checkpoint path from [`CheckpointWriter::checkpoint_path`]
//! 2. Get the checkpoint data from [`CheckpointWriter::checkpoint_data`]
//! 3. Write the data to the path in object storage (engine-specific)
//! 4. Collect metadata ([`FileMeta`]) from the write operation
//! 5. Pass the metadata and [`CheckpointDataIterator`] to `CheckpointWriter::finalize` to finalize the checkpoint
//! 5. Pass the metadata and consumed data iterator to `CheckpointWriter::finalize`
//!
//! ```no_run
//! # use std::sync::Arc;
//! # use delta_kernel::checkpoint::CheckpointData;
//! # use delta_kernel::checkpoint::CheckpointDataIterator;
//! # use delta_kernel::checkpoint::CheckpointWriter;
//! # use delta_kernel::Engine;
//! # use delta_kernel::table::Table;
Expand All @@ -40,12 +40,11 @@
//! # use delta_kernel::FileMeta;
//! # use delta_kernel::snapshot::Snapshot;
//! # use url::Url;
//! fn write_checkpoint_file(checkpoint_data: &CheckpointData) -> DeltaResult<FileMeta> {
//! todo!() /* engine-specific logic to write checkpoint_data.data to checkpoint_data.path */
//! fn write_checkpoint_file(path: Url, data: &CheckpointDataIterator) -> DeltaResult<FileMeta> {
//! todo!() /* engine-specific logic to write data to object storage*/
//! }
//!
//! // Create an engine instance
//! let engine: &dyn Engine = todo!();
//! let engine: &dyn Engine = todo!(); /* create engine instance */
//!
//! // Create a table instance for the table you want to checkpoint
//! let table = Table::try_from_uri("./tests/data/app-txn-no-checkpoint")?;
Expand All @@ -56,16 +55,17 @@
//! // Create a checkpoint writer from the snapshot
//! let mut writer: CheckpointWriter = snapshot.checkpoint()?;
//!
//! // Get the checkpoint data and path
//! // Get the checkpoint path and data
//! let checkpoint_path = writer.checkpoint_path()?;
//! let checkpoint_data = writer.checkpoint_data(engine)?;
//!
//! // Write the checkpoint data to the object store and collect metadata
//! let metadata: FileMeta = write_checkpoint_file(&checkpoint_data)?;
//! let metadata: FileMeta = write_checkpoint_file(checkpoint_path, &checkpoint_data)?;
//!
//! /* IMPORTANT: All data must be written before finalizing the checkpoint */
//!
//! // TODO(#850): Implement the finalize method
//! // writer.finalize(&engine, &metadata, checkpoint_data.data)?;
//! // writer.finalize(&engine, &metadata, checkpoint_data)?;
//!
//! # Ok::<_, Error>(())
//! ```
Expand Down Expand Up @@ -138,11 +138,12 @@ static CHECKPOINT_METADATA_ACTION_SCHEMA: LazyLock<SchemaRef> = LazyLock::new(||
/// tracks action statistics required for finalizing the checkpoint.
///
/// # Warning
/// Additionally, all yielded data must be written to the specified path before calling
/// `CheckpointWriter::finalize`, or it may result in data loss and corruption.
/// The [`CheckpointDataIterator`] must be fully consumed to ensure proper collection of statistics for
/// the checkpoint. Additionally, all yielded data must be written to the specified path before calling
/// `CheckpointWriter::finalize`. Failing to do so may result in data loss or corruption.
pub struct CheckpointDataIterator {
/// The inner iterator that yields the checkpoint data with counts
inner: Box<dyn Iterator<Item = DeltaResult<CheckpointBatch>>>,
/// The nested iterator that yields checkpoint batches with action counts
checkpoint_batch_iterator: Box<dyn Iterator<Item = DeltaResult<CheckpointBatch>>>,
/// Running total of actions included in the checkpoint
actions_count: i64,
/// Running total of add actions included in the checkpoint
Expand All @@ -154,12 +155,12 @@ impl Iterator for CheckpointDataIterator {

/// Advances the iterator and returns the next value.
///
/// This implementation transforms the `CheckpointBatch` items from the inner iterator into
/// [`FilteredEngineData`] items for the engine to write, while accumulating action counts for
/// This implementation transforms the `CheckpointBatch` items from the nested iterator into
/// [`FilteredEngineData`] items for the engine to write, while accumulating action counts from
/// each batch. The [`CheckpointDataIterator`] is passed back to the kernel on call to
/// `CheckpointWriter::finalize` for counts to be read and written to the `_last_checkpoint` file
fn next(&mut self) -> Option<Self::Item> {
let next_item = self.inner.next();
let next_item = self.checkpoint_batch_iterator.next();

next_item.map(|result| {
result.map(|batch| {
Expand All @@ -171,23 +172,6 @@ impl Iterator for CheckpointDataIterator {
}
}

/// Represents the data needed to create a single-file checkpoint.
///
/// Obtained from [`CheckpointWriter::checkpoint_data`], this struct provides both the
/// location where the checkpoint file should be written and an iterator over the data
/// that should be included in the checkpoint.
///
/// # Warning
/// The [`CheckpointDataIterator`] must be fully consumed to ensure proper collection of statistics for
/// the checkpoint. Additionally, all yielded data must be written to the specified path before calling
/// `CheckpointWriter::finalize`. Failing to do so may result in data loss or corruption.
pub struct CheckpointData {
/// The URL where the checkpoint file should be written.
pub path: Url,
/// An iterator over the checkpoint data to be written to the file.
pub data: CheckpointDataIterator,
}

/// Orchestrates the process of creating a checkpoint for a table.
///
/// The [`CheckpointWriter`] is the entry point for generating checkpoint data for a Delta table.
Expand All @@ -200,8 +184,6 @@ pub struct CheckpointData {
///
/// # See Also
/// See the [module-level documentation](self) for the complete checkpoint workflow
///
/// [`Table::checkpoint`]: [`crate::table::Table::checkpoint`]
pub struct CheckpointWriter {
/// Reference to the snapshot (i.e. version) of the table being checkpointed
pub(crate) snapshot: Arc<Snapshot>,
Expand All @@ -213,16 +195,28 @@ impl CheckpointWriter {
Self { snapshot }
}

/// Retrieves the checkpoint data and path information.
/// Returns the URL where the checkpoint file should be written.
///
/// This method generates the filtered actions for the checkpoint and determines
/// the appropriate destination path.
/// This method generates the checkpoint path based on the table's root and the current version.
/// The generated path follows the classic naming convention for checkpoints:
/// - `n.checkpoint.parquet`, where `n` is the current version of the table.
pub fn checkpoint_path(&self) -> DeltaResult<Url> {
ParsedLogPath::new_classic_parquet_checkpoint(
self.snapshot.table_root(),
self.snapshot.version(),
)
.map(|parsed| parsed.location)
}

/// Returns the checkpoint data to be written to the checkpoint file.
///
/// This method reads the actions from the log segment and processes them
/// to create the checkpoint data.
///
/// # Returns
/// [`CheckpointData`] containing the checkpoint path and data to write.
/// # Parameters
/// - `engine`: Implementation of [`Engine`] APIs.
///
/// # Warning
/// All data must be written to persistent storage before calling `CheckpointWriter::finalize()`.
/// # Returns: [`CheckpointDataIterator`] containing the checkpoint data
// This method is the core of the checkpoint generation process. It:
// 1. Determines whether to write a V1 or V2 checkpoint based on the table's
// `v2Checkpoints` feature support
Expand All @@ -231,7 +225,7 @@ impl CheckpointWriter {
// 4. Chains the checkpoint metadata action if writing a V2 spec checkpoint
// (i.e., if `v2Checkpoints` feature is supported by table)
// 5. Generates the appropriate checkpoint path
pub fn checkpoint_data(&mut self, engine: &dyn Engine) -> DeltaResult<CheckpointData> {
pub fn checkpoint_data(&mut self, engine: &dyn Engine) -> DeltaResult<CheckpointDataIterator> {
let is_v2_checkpoints_supported = self
.snapshot
.table_configuration()
Expand Down Expand Up @@ -263,21 +257,11 @@ impl CheckpointWriter {
.then(|| self.create_checkpoint_metadata_batch(version, engine)),
);

let checkpoint_path = ParsedLogPath::new_classic_parquet_checkpoint(
self.snapshot.table_root(),
self.snapshot.version(),
)?;

// Wrap the data iterator to send counts to the CheckpointWriter when dropped
let wrapped_iterator = CheckpointDataIterator {
inner: Box::new(chained),
// Wrap the iterator in a CheckpointDataIterator to track action counts
Ok(CheckpointDataIterator {
checkpoint_batch_iterator: Box::new(chained),
actions_count: 0,
add_actions_count: 0,
};

Ok(CheckpointData {
path: checkpoint_path.location,
data: wrapped_iterator,
})
}

Expand All @@ -292,15 +276,15 @@ impl CheckpointWriter {
/// # Parameters
/// - `engine`: Implementation of [`Engine`] apis.
/// - `metadata`: The metadata of the written checkpoint file
/// - `checkpoint_data_iter`: The exhausted checkpoint data iterator (must be fully consumed)
/// - `checkpoint_data`: The exhausted checkpoint data iterator (must be fully consumed)
///
/// # Returns: [`variant@Ok`] if the checkpoint was successfully finalized
#[allow(unused)]
fn finalize(
self,
_engine: &dyn Engine,
_metadata: &FileMeta,
_checkpoint_data_iter: CheckpointDataIterator,
_checkpoint_data: CheckpointDataIterator,
) -> DeltaResult<()> {
// Verify the iterator is exhausted (optional)
// Implementation will use checkpoint_data.actions_count and checkpoint_data.add_actions_count
Expand Down
21 changes: 9 additions & 12 deletions kernel/src/checkpoint/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,25 +221,24 @@ fn test_v1_checkpoint_latest_version_by_default() -> DeltaResult<()> {
let table = Table::new(table_root);
let snapshot = table.snapshot(&engine, None)?;
let mut writer = snapshot.checkpoint()?;
let checkpoint_data = writer.checkpoint_data(&engine)?;
let mut data_iter = checkpoint_data.data;

// Verify the checkpoint file path is the latest version by default.
assert_eq!(
checkpoint_data.path,
writer.checkpoint_path()?,
Url::parse("memory:///_delta_log/00000000000000000002.checkpoint.parquet")?
);

let mut data_iter = writer.checkpoint_data(&engine)?;
// The first batch should be the metadata and protocol actions.
let batch = data_iter.next().unwrap()?;
assert_eq!(batch.selection_vector, [true, true]);

// The second batch should only include the add action as the remove action is expired.
// The second batch should include both the add action and the remove action
let batch = data_iter.next().unwrap()?;
assert_eq!(batch.selection_vector, [true, true]);

// The third batch should not be included as the selection vector does not
// contain any true values, as the add action is removed in a following commit.
// contain any true values, as the file added is removed in a following commit.
assert!(data_iter.next().is_none());

assert_eq!(data_iter.actions_count, 4);
Expand Down Expand Up @@ -280,15 +279,14 @@ fn test_v1_checkpoint_specific_version() -> DeltaResult<()> {
// Specify version 0 for checkpoint
let snapshot = table.snapshot(&engine, Some(0))?;
let mut writer = snapshot.checkpoint()?;
let checkpoint_data = writer.checkpoint_data(&engine)?;
let mut data_iter = checkpoint_data.data;

// Verify the checkpoint file path is the specified version.
assert_eq!(
checkpoint_data.path,
writer.checkpoint_path()?,
Url::parse("memory:///_delta_log/00000000000000000000.checkpoint.parquet")?
);

let mut data_iter = writer.checkpoint_data(&engine)?;
// The first batch should be the metadata and protocol actions.
let batch = data_iter.next().unwrap()?;
assert_eq!(batch.selection_vector, [true, true]);
Expand Down Expand Up @@ -336,20 +334,19 @@ fn test_v2_checkpoint_supported_table() -> DeltaResult<()> {
let table = Table::new(table_root);
let snapshot = table.snapshot(&engine, None)?;
let mut writer = snapshot.checkpoint()?;
let checkpoint_data = writer.checkpoint_data(&engine)?;
let mut data_iter = checkpoint_data.data;

// Verify the checkpoint file path is the latest version by default.
assert_eq!(
checkpoint_data.path,
writer.checkpoint_path()?,
Url::parse("memory:///_delta_log/00000000000000000001.checkpoint.parquet")?
);

let mut data_iter = writer.checkpoint_data(&engine)?;
// The first batch should be the metadata and protocol actions.
let batch = data_iter.next().unwrap()?;
assert_eq!(batch.selection_vector, [true, true]);

// The second batch should be the add action as the remove action is expired.
// The second batch should include both the add action and the remove action
let batch = data_iter.next().unwrap()?;
assert_eq!(batch.selection_vector, [true, true]);

Expand Down
Loading