Skip to content

Commit

Permalink
add test, fix bug
Browse files Browse the repository at this point in the history
  • Loading branch information
Nick Lanham committed Sep 25, 2024
1 parent d1cd705 commit 3fecf88
Showing 1 changed file with 70 additions and 0 deletions.
70 changes: 70 additions & 0 deletions kernel/src/snapshot.rs
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,10 @@ fn list_log_files_with_checkpoint(

if max_checkpoint_version != checkpoint_metadata.version {
warn!("_last_checkpoint hint is out of date. _last_checkpoint version: {}. Using actual most recent: {}", checkpoint_metadata.version, max_checkpoint_version);
// we may need to drop some commits that are after the actual last checkpoint
commit_files.retain(|commit_meta| {
version_from_location(&commit_meta.location).unwrap_or(0) > max_checkpoint_version
});
} else if checkpoint_files.len() != checkpoint_metadata.parts.unwrap_or(1) as usize {
return Err(Error::Generic(format!(
"_last_checkpoint indicated that checkpoint should have {} parts, but it has {}",
Expand Down Expand Up @@ -502,6 +506,72 @@ mod tests {
assert!(cp.is_none())
}

#[test]
fn test_read_log_with_out_of_date_last_checkpoint() {
// in memory file system
let store = Arc::new(InMemory::new());

fn get_path(index: usize, suffix: &str) -> Path {
let path = format!("_delta_log/{index:020}.{suffix}");
Path::from(path.as_str())
}
let data = bytes::Bytes::from("kernel-data");

// put commit files
tokio::runtime::Runtime::new()
.expect("create tokio runtime")
.block_on(async {
for path in [
get_path(0, "json"),
get_path(1, "checkpoint.parquet"),
get_path(2, "json"),
get_path(3, "checkpoint.parquet"),
get_path(4, "json"),
get_path(5, "checkpoint.parquet"),
get_path(6, "json"),
get_path(7, "json"),
] {
store
.put(&path, data.clone().into())
.await
.expect("put _last_checkpoint");
}
});

let client = ObjectStoreFileSystemClient::new(
store,
Path::from("/"),
Arc::new(TokioBackgroundExecutor::new()),
);

let checkpoint_metadata = CheckpointMetadata {
version: 3,
size: 10,
parts: None,
size_in_bytes: None,
num_of_add_files: None,
checkpoint_schema: None,
checksum: None,
};
let url = Url::parse("memory:///_delta_log/").expect("valid url");
let (commit_files, checkpoint_files) =
list_log_files_with_checkpoint(&checkpoint_metadata, &client, &url).unwrap();
assert_eq!(checkpoint_files.len(), 1);
assert_eq!(commit_files.len(), 2);
assert_eq!(
version_from_location(&checkpoint_files[0].location).unwrap_or(0),
5
);
assert_eq!(
version_from_location(&commit_files[0].location).unwrap_or(0),
7
);
assert_eq!(
version_from_location(&commit_files[1].location).unwrap_or(0),
6
);
}

fn valid_last_checkpoint() -> Vec<u8> {
r#"{"size":8,"size_in_bytes":21857,"version":1}"#.as_bytes().to_vec()
}
Expand Down

0 comments on commit 3fecf88

Please sign in to comment.