- 
                Notifications
    You must be signed in to change notification settings 
- Fork 320
          Add cleanup support for partition-level statistics files when DROP TABLE PURGE
          #1508
        
          New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -30,10 +30,12 @@ | |
| import org.apache.iceberg.FileFormat; | ||
| import org.apache.iceberg.GenericBlobMetadata; | ||
| import org.apache.iceberg.GenericStatisticsFile; | ||
| import org.apache.iceberg.ImmutableGenericPartitionStatisticsFile; | ||
| import org.apache.iceberg.ManifestFile; | ||
| import org.apache.iceberg.ManifestFiles; | ||
| import org.apache.iceberg.ManifestWriter; | ||
| import org.apache.iceberg.PartitionSpec; | ||
| import org.apache.iceberg.PartitionStatisticsFile; | ||
| import org.apache.iceberg.Schema; | ||
| import org.apache.iceberg.Snapshot; | ||
| import org.apache.iceberg.SortOrder; | ||
|  | @@ -71,7 +73,7 @@ static ManifestFile manifestFile( | |
|  | ||
| static TableMetadata writeTableMetadata(FileIO fileIO, String metadataFile, Snapshot... snapshots) | ||
| throws IOException { | ||
| return writeTableMetadata(fileIO, metadataFile, null, null, null, snapshots); | ||
| return writeTableMetadata(fileIO, metadataFile, null, null, null, null, snapshots); | ||
| } | ||
|  | ||
| static TableMetadata writeTableMetadata( | ||
|  | @@ -80,7 +82,18 @@ static TableMetadata writeTableMetadata( | |
| List<StatisticsFile> statisticsFiles, | ||
| Snapshot... snapshots) | ||
| throws IOException { | ||
| return writeTableMetadata(fileIO, metadataFile, null, null, statisticsFiles, snapshots); | ||
| return writeTableMetadata(fileIO, metadataFile, null, null, statisticsFiles, null, snapshots); | ||
| } | ||
|  | ||
| static TableMetadata writeTableMetadata( | ||
| FileIO fileIO, | ||
| String metadataFile, | ||
| List<StatisticsFile> statisticsFiles, | ||
| List<PartitionStatisticsFile> partitionStatsFiles, | ||
| Snapshot... snapshots) | ||
| throws IOException { | ||
| return writeTableMetadata( | ||
| fileIO, metadataFile, null, null, statisticsFiles, partitionStatsFiles, snapshots); | ||
| } | ||
|  | ||
| static TableMetadata writeTableMetadata( | ||
|  | @@ -89,6 +102,7 @@ static TableMetadata writeTableMetadata( | |
| TableMetadata prevMetadata, | ||
| String prevMetadataFile, | ||
| List<StatisticsFile> statisticsFiles, | ||
| List<PartitionStatisticsFile> partitionStatsFiles, | ||
| Snapshot... snapshots) | ||
| throws IOException { | ||
| TableMetadata.Builder tmBuilder; | ||
|  | @@ -106,11 +120,15 @@ static TableMetadata writeTableMetadata( | |
| .addPartitionSpec(PartitionSpec.unpartitioned()); | ||
|  | ||
| int statisticsFileIndex = 0; | ||
| int partitionStatsFileIndex = 0; | ||
| for (Snapshot snapshot : snapshots) { | ||
| tmBuilder.addSnapshot(snapshot); | ||
| if (statisticsFiles != null) { | ||
| tmBuilder.setStatistics(statisticsFiles.get(statisticsFileIndex++)); | ||
| } | ||
| if (partitionStatsFiles != null) { | ||
| tmBuilder.setPartitionStatistics(partitionStatsFiles.get(partitionStatsFileIndex++)); | ||
| } | ||
| } | ||
| TableMetadata tableMetadata = tmBuilder.build(); | ||
| PositionOutputStream out = fileIO.newOutputFile(metadataFile).createOrOverwrite(); | ||
|  | @@ -161,4 +179,26 @@ public static StatisticsFile writeStatsFile( | |
| puffinWriter.writtenBlobsMetadata().stream().map(GenericBlobMetadata::from).toList()); | ||
| } | ||
| } | ||
|  | ||
| public static PartitionStatisticsFile writePartitionStatsFile( | ||
| long snapshotId, long snapshotSequenceNumber, String statsLocation, FileIO fileIO) | ||
| throws IOException { | ||
|  | ||
| try (PuffinWriter puffinWriter = Puffin.write(fileIO.newOutputFile(statsLocation)).build()) { | ||
|          | ||
| puffinWriter.add( | ||
| new Blob( | ||
| "some-blob-type", | ||
| List.of(1), | ||
| snapshotId, | ||
| snapshotSequenceNumber, | ||
| ByteBuffer.wrap("blob content".getBytes(StandardCharsets.UTF_8)))); | ||
| puffinWriter.finish(); | ||
|  | ||
| return ImmutableGenericPartitionStatisticsFile.builder() | ||
| .snapshotId(snapshotId) | ||
| .path(statsLocation) | ||
| .fileSizeInBytes(puffinWriter.fileSize()) | ||
| .build(); | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -25,6 +25,7 @@ | |
| import java.util.stream.Collectors; | ||
| import java.util.stream.Stream; | ||
| import org.apache.iceberg.ManifestFile; | ||
| import org.apache.iceberg.PartitionStatisticsFile; | ||
| import org.apache.iceberg.Snapshot; | ||
| import org.apache.iceberg.StatisticsFile; | ||
| import org.apache.iceberg.TableMetadata; | ||
|  | @@ -112,7 +113,6 @@ public boolean handleTask(TaskEntity cleanupTask, CallContext callContext) { | |
| metaStoreManager, | ||
| polarisCallContext); | ||
|  | ||
| // TODO: handle partition statistics files | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like these weren't left out due to an oversight but rather they were intentionally excluded. I'm curious if there is any background on why that is -- is there some specific pitfall related to cleaning up the partition stats? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 
 Good catch — to the best of my knowledge, Polaris  Iceberg will delete all file types under the metadata/ directory, including manifests, manifest lists, metadata files, previous metadata, and .stats files (both table and partition-level). Iceberg code pointer: CatalogUtil.java#L124 for reference. This gap also discussed earlier in this issue comment. Happy to learn more if there’s additional context I missed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see, interesting. Thanks for sharing that link and that context | ||
| Stream<TaskEntity> metadataFileCleanupTasks = | ||
| getMetadataTaskStream( | ||
| cleanupTask, | ||
|  | @@ -243,12 +243,13 @@ private Stream<TaskEntity> getMetadataTaskStream( | |
| private List<List<String>> getMetadataFileBatches(TableMetadata tableMetadata, int batchSize) { | ||
| List<List<String>> result = new ArrayList<>(); | ||
| List<String> metadataFiles = | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it really accurate to call puffin files metadata files, and is it necessarily correct to group all of these together? I guess the intent here is to collect all of the not-data files? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 
 Good point — the original intention was to group files under the metadata/ directory to reduce the overhead of scheduling tasks. As more file types like stats and partition stats were added later, nonDataFiles (or something similar) might now better reflect what’s being collected. Curious to hear your thoughts — would it be clearer to separate them, or is the performance benefit of grouping still preferred? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see... continuing to separate out the data files makes sense to me. I think this came up on one of the previous PRs, but the real solution here needs to eventually involve moving this purge work out of the catalog server and into the maintenance service where we handle compaction etc. That's the only way to really achieve scalability. | ||
| Stream.concat( | ||
| Stream.concat( | ||
| tableMetadata.previousFiles().stream() | ||
| .map(TableMetadata.MetadataLogEntry::file), | ||
| tableMetadata.snapshots().stream().map(Snapshot::manifestListLocation)), | ||
| tableMetadata.statisticsFiles().stream().map(StatisticsFile::path)) | ||
| Stream.of( | ||
| tableMetadata.previousFiles().stream().map(TableMetadata.MetadataLogEntry::file), | ||
| tableMetadata.snapshots().stream().map(Snapshot::manifestListLocation), | ||
| tableMetadata.statisticsFiles().stream().map(StatisticsFile::path), | ||
| tableMetadata.partitionStatisticsFiles().stream() | ||
| .map(PartitionStatisticsFile::path)) | ||
| .flatMap(s -> s) | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does this line achieve? Does  There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good question. 
 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ahh that makes sense - I missed the change to  | ||
| .toList(); | ||
|  | ||
| for (int i = 0; i < metadataFiles.size(); i += batchSize) { | ||
|  | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Only the
secondMetadata. partitionStatisticsFiles()is enough here as it contains the entries for all the snapshots?similar to statisticsFiles() that exists already.