Skip to content

Commit bd932d6

Browse files
committed
remove stat field from remove actions
1 parent 7b43b35 commit bd932d6

File tree

3 files changed

+6
-50
lines changed

3 files changed

+6
-50
lines changed

kernel/src/actions/mod.rs

-7
Original file line numberDiff line numberDiff line change
@@ -442,12 +442,6 @@ struct Remove {
442442
#[cfg_attr(test, serde(skip_serializing_if = "Option::is_none"))]
443443
pub(crate) size: Option<i64>,
444444

445-
/// Contains [statistics] (e.g., count, min/max values for columns) about the data in this logical file encoded as a JSON string.
446-
///
447-
/// [statistics]: https://github.com/delta-io/delta/blob/master/PROTOCOL.md#Per-file-Statistics
448-
#[cfg_attr(test, serde(skip_serializing_if = "Option::is_none"))]
449-
pub(crate) stats: Option<String>,
450-
451445
/// Map containing metadata about this logical file.
452446
#[cfg_attr(test, serde(skip_serializing_if = "Option::is_none"))]
453447
pub(crate) tags: Option<HashMap<String, String>>,
@@ -639,7 +633,6 @@ mod tests {
639633
StructField::new("extendedFileMetadata", DataType::BOOLEAN, true),
640634
partition_values_field(),
641635
StructField::new("size", DataType::LONG, true),
642-
StructField::new("stats", DataType::STRING, true),
643636
tags_field(),
644637
deletion_vector_field(),
645638
StructField::new("baseRowId", DataType::LONG, true),

kernel/src/actions/visitors.rs

+5-42
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ impl RemoveVisitor {
255255
getters: &[&'a dyn GetData<'a>],
256256
) -> DeltaResult<Remove> {
257257
require!(
258-
getters.len() == 15,
258+
getters.len() == 14,
259259
Error::InternalError(format!(
260260
"Wrong number of RemoveVisitor getters: {}",
261261
getters.len()
@@ -272,15 +272,13 @@ impl RemoveVisitor {
272272

273273
let size: Option<i64> = getters[5].get_opt(row_index, "remove.size")?;
274274

275-
let stats: Option<String> = getters[6].get_opt(row_index, "remove.stats")?;
276-
277-
// TODO(nick) tags are skipped in getters[7]
275+
// TODO(nick) tags are skipped in getters[6]
278276

279-
let deletion_vector = visit_deletion_vector_at(row_index, &getters[8..])?;
277+
let deletion_vector = visit_deletion_vector_at(row_index, &getters[7..])?;
280278

281-
let base_row_id: Option<i64> = getters[13].get_opt(row_index, "remove.baseRowId")?;
279+
let base_row_id: Option<i64> = getters[12].get_opt(row_index, "remove.baseRowId")?;
282280
let default_row_commit_version: Option<i64> =
283-
getters[14].get_opt(row_index, "remove.defaultRowCommitVersion")?;
281+
getters[13].get_opt(row_index, "remove.defaultRowCommitVersion")?;
284282

285283
Ok(Remove {
286284
path,
@@ -289,7 +287,6 @@ impl RemoveVisitor {
289287
extended_file_metadata,
290288
partition_values,
291289
size,
292-
stats,
293290
tags: None,
294291
deletion_vector,
295292
base_row_id,
@@ -635,40 +632,6 @@ mod tests {
635632
}
636633
}
637634

638-
#[test]
639-
fn test_parse_remove() {
640-
let engine = SyncEngine::new();
641-
let json_handler = engine.get_json_handler();
642-
let json_strings: StringArray = vec![
643-
r#"{"commitInfo":{"timestamp":1670892998177,"operation":"DELETE","operationParameters":{"mode":"Append"},"isolationLevel":"Serializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"1356"},"engineInfo":"Apache-Spark/3.3.1 Delta-Lake/2.2.0","txnId":"046a258f-45e3-4657-b0bf-abfb0f76681c"}}"#,
644-
r#"{"remove":{"path":"part-00003-f525f459-34f9-46f5-82d6-d42121d883fd.c000.snappy.parquet","deletionTimestamp":1670892998135,"dataChange":true,"size":452,"stats":"{\"numRecords\":1,\"minValues\":{\"c3\":5},\"maxValues\":{\"c3\":5},\"nullCount\":{\"c3\":0}}"}}"#,
645-
]
646-
.into();
647-
let output_schema = get_log_schema().clone();
648-
let batch = json_handler
649-
.parse_json(string_array_to_engine_data(json_strings), output_schema)
650-
.unwrap();
651-
let mut remove_visitor = RemoveVisitor::default();
652-
remove_visitor.visit_rows_of(batch.as_ref()).unwrap();
653-
let expected_remove: Remove = Remove {
654-
path: "part-00003-f525f459-34f9-46f5-82d6-d42121d883fd.c000.snappy.parquet".into(),
655-
deletion_timestamp: Some(1670892998135),
656-
data_change: true,
657-
size: Some(452),
658-
stats: Some("{\"numRecords\":1,\"minValues\":{\"c3\":5},\"maxValues\":{\"c3\":5},\"nullCount\":{\"c3\":0}}".into()),
659-
..Default::default()
660-
};
661-
assert_eq!(
662-
remove_visitor.removes.len(),
663-
1,
664-
"Unexpected number of remove actions"
665-
);
666-
assert_eq!(
667-
remove_visitor.removes[0], expected_remove,
668-
"Unexpected remove action"
669-
);
670-
}
671-
672635
#[test]
673636
fn test_parse_remove_partitioned() {
674637
let engine = SyncEngine::new();

kernel/src/schema.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ pub struct StructType {
223223
pub type_name: String,
224224
/// The type of element stored in this array
225225
// We use indexmap to preserve the order of fields as they are defined in the schema
226-
// while also allowing for fast lookup by name. The alternative to do a linear search
226+
// while also allowing for fast lookup by name. The alternative is to do a linear search
227227
// for each field by name would be potentially quite expensive for large schemas.
228228
pub fields: IndexMap<String, StructField>,
229229
}

0 commit comments

Comments
 (0)