Skip to content

Commit 6cce0e7

Browse files
Fix memory leak in Iceberg remove orphan files procedure
1 parent a0af241 commit 6cce0e7

File tree

1 file changed

+19
-1
lines changed

1 file changed

+19
-1
lines changed

plugin/trino-iceberg/src/main/java/io/trino/plugin/iceberg/IcebergMetadata.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,10 +206,12 @@
206206
import java.util.HashSet;
207207
import java.util.Iterator;
208208
import java.util.LinkedHashMap;
209+
import java.util.LinkedList;
209210
import java.util.List;
210211
import java.util.Map;
211212
import java.util.Optional;
212213
import java.util.OptionalLong;
214+
import java.util.Queue;
213215
import java.util.Set;
214216
import java.util.concurrent.Callable;
215217
import java.util.concurrent.ConcurrentHashMap;
@@ -2251,7 +2253,9 @@ private void removeOrphanFiles(Table table, ConnectorSession session, SchemaTabl
22512253
ImmutableSet.Builder<String> validMetadataFileNames = ImmutableSet.builder();
22522254
ImmutableSet.Builder<String> validDataFileNames = ImmutableSet.builder();
22532255

2254-
for (Snapshot snapshot : table.snapshots()) {
2256+
Queue<Snapshot> allSnapshots = getAllSnapshots(table);
2257+
while (!allSnapshots.isEmpty()) {
2258+
Snapshot snapshot = allSnapshots.poll();
22552259
if (snapshot.manifestListLocation() != null) {
22562260
validMetadataFileNames.add(fileName(snapshot.manifestListLocation()));
22572261
}
@@ -4179,4 +4183,18 @@ private static TableStatistics mergeColumnStatistics(TableStatistics currentStat
41794183
newStats.getColumnStatistics().forEach(statisticsBuilder::setColumnStatistics);
41804184
return statisticsBuilder.build();
41814185
}
4186+
4187+
/**
4188+
* Table.snapshots() returns Iterable but the actual implementation returned is a List
4189+
* snapshot.allManifests() caches manifests, so by keeping a List around GC is prevented from collecting any in memory manifests which leaks memory
4190+
* Workaround: use Queue, Queue#poll will unlink old Snapshots allowing for GC during traversal
4191+
*/
4192+
private static Queue<Snapshot> getAllSnapshots(Table icebergTable)
4193+
{
4194+
Queue<Snapshot> allSnapshots = new LinkedList<>();
4195+
for (Snapshot snapshot : icebergTable.snapshots()) {
4196+
allSnapshots.add(snapshot);
4197+
}
4198+
return allSnapshots;
4199+
}
41824200
}

0 commit comments

Comments
 (0)