From 44bef22643d23cddf583d4e6e23b01ff1a7d0275 Mon Sep 17 00:00:00 2001 From: Maximilian Michels Date: Thu, 13 Nov 2025 15:44:09 +0100 Subject: [PATCH] Core: Classify RowDelta with data files only as APPEND When RowDelta only adds data files without adding delete files or deleting data files, it should be classified as an APPEND operation instead of OVERWRITE. This is similiar to the existing logic in OverwriteFiles: https://github.com/apache/iceberg/blame/main/core/src/main/java/org/apache/iceberg/BaseOverwriteFiles.java#L56 --- .../java/org/apache/iceberg/BaseRowDelta.java | 4 ++++ .../java/org/apache/iceberg/TestRowDelta.java | 24 +++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/core/src/main/java/org/apache/iceberg/BaseRowDelta.java b/core/src/main/java/org/apache/iceberg/BaseRowDelta.java index b819d03dd5f8..92386ee42159 100644 --- a/core/src/main/java/org/apache/iceberg/BaseRowDelta.java +++ b/core/src/main/java/org/apache/iceberg/BaseRowDelta.java @@ -48,6 +48,10 @@ protected BaseRowDelta self() { @Override protected String operation() { + if (addsDataFiles() && !addsDeleteFiles() && !deletesDataFiles()) { + return DataOperations.APPEND; + } + if (addsDeleteFiles() && !addsDataFiles()) { return DataOperations.DELETE; } diff --git a/core/src/test/java/org/apache/iceberg/TestRowDelta.java b/core/src/test/java/org/apache/iceberg/TestRowDelta.java index 4d1d11081d4b..075314ea759a 100644 --- a/core/src/test/java/org/apache/iceberg/TestRowDelta.java +++ b/core/src/test/java/org/apache/iceberg/TestRowDelta.java @@ -76,6 +76,25 @@ public void addOnlyDeleteFilesProducesDeleteOperation() { assertThat(snap.deleteManifests(table.io())).hasSize(1); } + @TestTemplate + public void addOnlyDataFilesProducesAppendOperation() { + SnapshotUpdate rowDelta = table.newRowDelta().addRows(FILE_A).addRows(FILE_B); + + commit(table, rowDelta, branch); + Snapshot snap = latestSnapshot(table, branch); + assertThat(snap.sequenceNumber()).isEqualTo(1); + assertThat(snap.operation()).isEqualTo(DataOperations.APPEND); + assertThat(snap.dataManifests(table.io())).hasSize(1); + + validateManifest( + snap.dataManifests(table.io()).get(0), + dataSeqs(1L, 1L), + fileSeqs(1L, 1L), + ids(snap.snapshotId(), snap.snapshotId()), + files(FILE_A, FILE_B), + statuses(Status.ADDED, Status.ADDED)); + } + @TestTemplate public void testAddRemoveRows() { SnapshotUpdate rowDelta = @@ -599,6 +618,7 @@ public void testOverwriteWithRemoveRows() { long deltaSnapshotId = latestSnapshot(table, branch).snapshotId(); assertThat(latestSnapshot(table, branch).sequenceNumber()).isEqualTo(1); + assertThat(latestSnapshot(table, branch).operation()).isEqualTo(DataOperations.OVERWRITE); assertThat(table.ops().current().lastSequenceNumber()).isEqualTo(1); // overwriting by a filter will also remove delete files that match because all matching data @@ -642,6 +662,7 @@ public void testReplacePartitionsWithRemoveRows() { long deltaSnapshotId = latestSnapshot(table, branch).snapshotId(); assertThat(latestSnapshot(table, branch).sequenceNumber()).isEqualTo(1); + assertThat(latestSnapshot(table, branch).operation()).isEqualTo(DataOperations.OVERWRITE); assertThat(table.ops().current().lastSequenceNumber()).isEqualTo(1); // overwriting the partition will also remove delete files that match because all matching data @@ -688,6 +709,7 @@ public void testDeleteByExpressionWithRemoveRows() { branch); assertThat(latestSnapshot(table, branch).sequenceNumber()).isEqualTo(1); + assertThat(latestSnapshot(table, branch).operation()).isEqualTo(DataOperations.OVERWRITE); assertThat(table.ops().current().lastSequenceNumber()).isEqualTo(1); // deleting with a filter will also remove delete files that match because all matching data @@ -726,6 +748,7 @@ public void testDeleteDataFileWithRemoveRows() { long deltaSnapshotId = latestSnapshot(table, branch).snapshotId(); assertThat(latestSnapshot(table, branch).sequenceNumber()).isEqualTo(1); + assertThat(latestSnapshot(table, branch).operation()).isEqualTo(DataOperations.OVERWRITE); assertThat(table.ops().current().lastSequenceNumber()).isEqualTo(1); // deleting a specific data file will not affect a delete file in v2 or less @@ -786,6 +809,7 @@ public void testFastAppendDoesNotRemoveStaleDeleteFiles() { long deltaSnapshotId = latestSnapshot(table, branch).snapshotId(); assertThat(latestSnapshot(table, branch).sequenceNumber()).isEqualTo(1); + assertThat(latestSnapshot(table, branch).operation()).isEqualTo(DataOperations.OVERWRITE); assertThat(table.ops().current().lastSequenceNumber()).isEqualTo(1); // deleting a specific data file will not affect a delete file