-
Notifications
You must be signed in to change notification settings - Fork 75
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Snapshot::try_new_from()
API
#549
Changes from 17 commits
f8bc074
3d37288
5480711
2686fb3
0daf1e9
692c9d5
f18e380
8d3357b
3bf3d67
453db1b
ce31322
f1578fb
ee61b75
fea0f76
81f61ae
7b0bd1c
691b23a
27c4a95
66e44d2
46ab944
5a582d6
592667b
75b6178
5cdde76
a94ddef
e8e327d
d5bcb67
be88b88
d396953
6e49bc2
032d72b
cb06927
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -37,7 +37,7 @@ mod tests; | |||||
/// and in `TableChanges` when built with [`LogSegment::for_table_changes`]. | ||||||
/// | ||||||
/// [`Snapshot`]: crate::snapshot::Snapshot | ||||||
#[derive(Debug)] | ||||||
#[derive(Debug, Clone, PartialEq, Eq)] | ||||||
#[cfg_attr(feature = "developer-visibility", visibility::make(pub))] | ||||||
pub(crate) struct LogSegment { | ||||||
pub end_version: Version, | ||||||
|
@@ -49,12 +49,17 @@ pub(crate) struct LogSegment { | |||||
} | ||||||
|
||||||
impl LogSegment { | ||||||
fn try_new( | ||||||
ascending_commit_files: Vec<ParsedLogPath>, | ||||||
pub(crate) fn try_new( | ||||||
mut ascending_commit_files: Vec<ParsedLogPath>, | ||||||
checkpoint_parts: Vec<ParsedLogPath>, | ||||||
log_root: Url, | ||||||
end_version: Option<Version>, | ||||||
) -> DeltaResult<Self> { | ||||||
// Commit file versions must be greater than the most recent checkpoint version if it exists | ||||||
if let Some(checkpoint_file) = checkpoint_parts.first() { | ||||||
ascending_commit_files.retain(|log_path| checkpoint_file.version < log_path.version); | ||||||
} | ||||||
|
||||||
// We require that commits that are contiguous. In other words, there must be no gap between commit versions. | ||||||
require!( | ||||||
ascending_commit_files | ||||||
|
@@ -81,22 +86,23 @@ impl LogSegment { | |||||
} | ||||||
|
||||||
// Get the effective version from chosen files | ||||||
let version_eff = ascending_commit_files | ||||||
let effective_version = ascending_commit_files | ||||||
.last() | ||||||
.or(checkpoint_parts.first()) | ||||||
.ok_or(Error::generic("No files in log segment"))? | ||||||
.version; | ||||||
if let Some(end_version) = end_version { | ||||||
require!( | ||||||
version_eff == end_version, | ||||||
effective_version == end_version, | ||||||
Error::generic(format!( | ||||||
"LogSegment end version {} not the same as the specified end version {}", | ||||||
version_eff, end_version | ||||||
effective_version, end_version | ||||||
)) | ||||||
); | ||||||
} | ||||||
|
||||||
Ok(LogSegment { | ||||||
end_version: version_eff, | ||||||
end_version: effective_version, | ||||||
log_root, | ||||||
ascending_commit_files, | ||||||
checkpoint_parts, | ||||||
|
@@ -122,7 +128,7 @@ impl LogSegment { | |||||
) -> DeltaResult<Self> { | ||||||
let time_travel_version = time_travel_version.into(); | ||||||
|
||||||
let (mut ascending_commit_files, checkpoint_parts) = | ||||||
let (ascending_commit_files, checkpoint_parts) = | ||||||
match (checkpoint_hint.into(), time_travel_version) { | ||||||
(Some(cp), None) => { | ||||||
list_log_files_with_checkpoint(&cp, fs_client, &log_root, None)? | ||||||
|
@@ -133,11 +139,6 @@ impl LogSegment { | |||||
_ => list_log_files_with_version(fs_client, &log_root, None, time_travel_version)?, | ||||||
}; | ||||||
|
||||||
// Commit file versions must be greater than the most recent checkpoint version if it exists | ||||||
if let Some(checkpoint_file) = checkpoint_parts.first() { | ||||||
ascending_commit_files.retain(|log_path| checkpoint_file.version < log_path.version); | ||||||
} | ||||||
|
||||||
LogSegment::try_new( | ||||||
ascending_commit_files, | ||||||
checkpoint_parts, | ||||||
|
@@ -362,8 +363,12 @@ impl LogSegment { | |||||
)?)) | ||||||
} | ||||||
|
||||||
// Get the most up-to-date Protocol and Metadata actions | ||||||
pub(crate) fn read_metadata(&self, engine: &dyn Engine) -> DeltaResult<(Metadata, Protocol)> { | ||||||
// Do a lightweight protocol+metadata log replay to find the latest Protocol and Metadata in | ||||||
// the LogSegment | ||||||
pub(crate) fn protocol_and_metadata( | ||||||
&self, | ||||||
engine: &dyn Engine, | ||||||
) -> DeltaResult<(Option<Metadata>, Option<Protocol>)> { | ||||||
let data_batches = self.replay_for_metadata(engine)?; | ||||||
let (mut metadata_opt, mut protocol_opt) = (None, None); | ||||||
for batch in data_batches { | ||||||
|
@@ -379,7 +384,12 @@ impl LogSegment { | |||||
break; | ||||||
} | ||||||
} | ||||||
match (metadata_opt, protocol_opt) { | ||||||
Ok((metadata_opt, protocol_opt)) | ||||||
} | ||||||
|
||||||
// Get the most up-to-date Protocol and Metadata actions | ||||||
pub(crate) fn read_metadata(&self, engine: &dyn Engine) -> DeltaResult<(Metadata, Protocol)> { | ||||||
match self.protocol_and_metadata(engine)? { | ||||||
(Some(m), Some(p)) => Ok((m, p)), | ||||||
(None, Some(_)) => Err(Error::MissingMetadata), | ||||||
(Some(_), None) => Err(Error::MissingProtocol), | ||||||
|
@@ -403,6 +413,11 @@ impl LogSegment { | |||||
// read the same protocol and metadata schema for both commits and checkpoints | ||||||
self.read_actions(engine, schema.clone(), schema, META_PREDICATE.clone()) | ||||||
} | ||||||
|
||||||
/// Return whether or not the LogSegment contains a checkpoint. | ||||||
pub(crate) fn has_checkpoint(&self) -> bool { | ||||||
!self.checkpoint_parts.is_empty() | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (or, just do that at the one call site, instead of defining a helper at all) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yea realized probably not necessary, though I've kept the 'not is_empty()` since it's a vec |
||||||
} | ||||||
} | ||||||
|
||||||
/// Returns a fallible iterator of [`ParsedLogPath`] that are between the provided `start_version` (inclusive) | ||||||
|
@@ -432,12 +447,13 @@ fn list_log_files( | |||||
Err(_) => true, | ||||||
})) | ||||||
} | ||||||
|
||||||
/// List all commit and checkpoint files with versions above the provided `start_version` (inclusive). | ||||||
/// If successful, this returns a tuple `(ascending_commit_files, checkpoint_parts)` of type | ||||||
/// `(Vec<ParsedLogPath>, Vec<ParsedLogPath>)`. The commit files are guaranteed to be sorted in | ||||||
/// ascending order by version. The elements of `checkpoint_parts` are all the parts of the same | ||||||
/// checkpoint. Checkpoint parts share the same version. | ||||||
fn list_log_files_with_version( | ||||||
pub(crate) fn list_log_files_with_version( | ||||||
fs_client: &dyn FileSystemClient, | ||||||
log_root: &Url, | ||||||
start_version: Option<Version>, | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
only call site did this right before try_new: we should probably just bake it in here and then when we use this in
Snapshot::try_new_from
we get the guarantee