From 7d8d78ae4eaef7401e583d99fc45e89c4456fd00 Mon Sep 17 00:00:00 2001 From: zzzzming Date: Sun, 20 Oct 2024 12:57:10 +0800 Subject: [PATCH] Add the parameter when orcTail is initialized --- .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 54d693c2fd43..e450ce8291be 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -133,6 +133,7 @@ import org.apache.orc.StripeInformation; import org.apache.orc.StripeStatistics; import org.apache.orc.TypeDescription; +import org.apache.orc.impl.BufferChunk; import org.apache.orc.impl.InStream; import org.apache.orc.impl.OrcTail; import org.apache.orc.impl.SchemaEvolution; @@ -1669,8 +1670,13 @@ private void populateAndCacheStripeDetails() throws IOException { OrcFile.readerOptions(context.conf) .filesystem(fs) .maxLength(context.isAcid ? AcidUtils.getLogicalLength(fs, file) : file.getLen()))) { - orcTail = new OrcTail(orcReader.getFileTail(), orcReader.getSerializedFileFooter(), - file.getModificationTime()); + BufferChunk bufferChunk = new BufferChunk(orcReader.getSerializedFileFooter(), + getStripeStatisticsOffset(orcReader.getFileTail())); + orcTail = new OrcTail(orcReader.getFileTail(), + bufferChunk, + file.getModificationTime(), + orcReader); + if (context.cacheStripeDetails) { context.footerCache.put(new FooterCacheKey(fsFileId, file.getPath()), orcTail); } @@ -1724,6 +1730,20 @@ private void populateAndCacheStripeDetails() throws IOException { } } + private long getMetadataOffset(OrcProto.FileTail tail) { + OrcProto.PostScript ps = tail.getPostscript(); + return tail.getFileLength() + - 1 + - tail.getPostscriptLength() + - ps.getFooterLength() + - ps.getMetadataLength(); + } + + private long getStripeStatisticsOffset(OrcProto.FileTail tail) { + OrcProto.PostScript ps = tail.getPostscript(); + return getMetadataOffset(tail) - ps.getStripeStatisticsLength(); + } + private long computeProjectionSize(List fileTypes, List stats, boolean[] fileIncluded) throws FileFormatException { // Exclude ORC and ACID struct elements to avoid full schema size estimation