apache · zzzzming95 · Oct 20, 2024 · zhangbutao · Oct 21, 2024
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -133,6 +133,7 @@
 import org.apache.orc.StripeInformation;
 import org.apache.orc.StripeStatistics;
 import org.apache.orc.TypeDescription;
+import org.apache.orc.impl.BufferChunk;
 import org.apache.orc.impl.InStream;
 import org.apache.orc.impl.OrcTail;
 import org.apache.orc.impl.SchemaEvolution;
@@ -1669,8 +1670,13 @@ private void populateAndCacheStripeDetails() throws IOException {
             OrcFile.readerOptions(context.conf)
                 .filesystem(fs)
                 .maxLength(context.isAcid ? AcidUtils.getLogicalLength(fs, file) : file.getLen()))) {
-          orcTail = new OrcTail(orcReader.getFileTail(), orcReader.getSerializedFileFooter(),
-              file.getModificationTime());
+          BufferChunk bufferChunk = new BufferChunk(orcReader.getSerializedFileFooter(),
+                  getStripeStatisticsOffset(orcReader.getFileTail()));
+          orcTail = new OrcTail(orcReader.getFileTail(),
+                  bufferChunk,
+                  file.getModificationTime(),
+                  orcReader);
+
           if (context.cacheStripeDetails) {
             context.footerCache.put(new FooterCacheKey(fsFileId, file.getPath()), orcTail);
           }
@@ -1724,6 +1730,20 @@ private void populateAndCacheStripeDetails() throws IOException {
       }
     }
 
+    private long getMetadataOffset(OrcProto.FileTail tail) {
+      OrcProto.PostScript ps = tail.getPostscript();
+      return tail.getFileLength()
+              - 1
+              - tail.getPostscriptLength()
+              - ps.getFooterLength()
+              - ps.getMetadataLength();
+    }
+
+    private long getStripeStatisticsOffset(OrcProto.FileTail tail) {
+      OrcProto.PostScript ps = tail.getPostscript();
+      return getMetadataOffset(tail) - ps.getStripeStatisticsLength();
+    }
+
     private long computeProjectionSize(List<OrcProto.Type> fileTypes,
           List<OrcProto.ColumnStatistics> stats, boolean[] fileIncluded) throws FileFormatException {
       // Exclude ORC <root> and ACID <row> struct elements to avoid full schema size estimation