|  | 
|  | 1 | +// Licensed to the Apache Software Foundation (ASF) under one | 
|  | 2 | +// or more contributor license agreements.  See the NOTICE file | 
|  | 3 | +// distributed with this work for additional information | 
|  | 4 | +// regarding copyright ownership.  The ASF licenses this file | 
|  | 5 | +// to you under the Apache License, Version 2.0 (the | 
|  | 6 | +// "License"); you may not use this file except in compliance | 
|  | 7 | +// with the License.  You may obtain a copy of the License at | 
|  | 8 | +// | 
|  | 9 | +//   http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 10 | +// | 
|  | 11 | +// Unless required by applicable law or agreed to in writing, | 
|  | 12 | +// software distributed under the License is distributed on an | 
|  | 13 | +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
|  | 14 | +// KIND, either express or implied.  See the License for the | 
|  | 15 | +// specific language governing permissions and limitations | 
|  | 16 | +// under the License. | 
|  | 17 | + | 
|  | 18 | +use std::collections::HashMap; | 
|  | 19 | +use std::fs::File; | 
|  | 20 | +use std::io::{Read, Seek, SeekFrom}; | 
|  | 21 | + | 
|  | 22 | +use iceberg::Result; | 
|  | 23 | + | 
|  | 24 | +use crate::compression::CompressionCodec; | 
|  | 25 | +use crate::metadata::BlobMetadata; | 
|  | 26 | + | 
|  | 27 | +pub const APACHE_DATASKETCHES_THETA_V1: &str = "apache-datasketches-theta-v1"; | 
|  | 28 | + | 
|  | 29 | +#[derive(Debug)] | 
|  | 30 | +pub(crate) struct Blob { | 
|  | 31 | +    pub(crate) r#type: String, | 
|  | 32 | +    pub(crate) input_fields: Vec<i32>, | 
|  | 33 | +    pub(crate) snapshot_id: i64, | 
|  | 34 | +    pub(crate) sequence_number: i64, | 
|  | 35 | +    pub(crate) data: Vec<u8>, | 
|  | 36 | +    pub(crate) requested_compression_codec: Option<CompressionCodec>, | 
|  | 37 | +    pub(crate) properties: HashMap<String, String>, | 
|  | 38 | +} | 
|  | 39 | + | 
|  | 40 | +impl Blob { | 
|  | 41 | +    /// Reads blob from a Puffin file | 
|  | 42 | +    pub(crate) fn read(file: &mut File, blob_metadata: &BlobMetadata) -> Result<Blob> { | 
|  | 43 | +        let mut bytes = vec![0; blob_metadata.length]; | 
|  | 44 | +        file.seek(SeekFrom::Start(blob_metadata.offset))?; | 
|  | 45 | +        file.read(&mut bytes)?; | 
|  | 46 | +        let data = CompressionCodec::decompress(blob_metadata.compression_codec, bytes)?; | 
|  | 47 | +        return Ok(Blob { | 
|  | 48 | +            r#type: blob_metadata.r#type.clone(), | 
|  | 49 | +            input_fields: blob_metadata.input_fields.clone(), | 
|  | 50 | +            snapshot_id: blob_metadata.snapshot_id, | 
|  | 51 | +            sequence_number: blob_metadata.sequence_number, | 
|  | 52 | +            data, | 
|  | 53 | +            requested_compression_codec: Some(blob_metadata.compression_codec), | 
|  | 54 | +            properties: blob_metadata.properties.clone(), | 
|  | 55 | +        }); | 
|  | 56 | +    } | 
|  | 57 | +} | 
|  | 58 | + | 
|  | 59 | +#[cfg(test)] | 
|  | 60 | +mod tests { | 
|  | 61 | +    use std::fs::File; | 
|  | 62 | + | 
|  | 63 | +    use crate::test_utils::test_utils::{ | 
|  | 64 | +        read_blob_as_utf8_string, read_test_file, METRIC_BLOB_0, METRIC_BLOB_1, | 
|  | 65 | +    }; | 
|  | 66 | + | 
|  | 67 | +    #[tokio::test] | 
|  | 68 | +    async fn test_read_rust_generated_uncompressed_metric_data() { | 
|  | 69 | +        let mut file = read_test_file("v1/rust-generated/sample-metric-data-uncompressed.bin"); | 
|  | 70 | +        assert_eq!(read_blob_as_utf8_string(&mut file, 0), METRIC_BLOB_0); | 
|  | 71 | +        assert_eq!(read_blob_as_utf8_string(&mut file, 1), METRIC_BLOB_1); | 
|  | 72 | +    } | 
|  | 73 | + | 
|  | 74 | +    #[tokio::test] | 
|  | 75 | +    async fn test_read_rust_generated_ztd_compressed_metric_data() { | 
|  | 76 | +        let mut file = read_test_file("v1/rust-generated/sample-metric-data-compressed-zstd.bin"); | 
|  | 77 | +        assert_eq!(read_blob_as_utf8_string(&mut file, 0), METRIC_BLOB_0); | 
|  | 78 | +        assert_eq!(read_blob_as_utf8_string(&mut file, 1), METRIC_BLOB_1); | 
|  | 79 | +    } | 
|  | 80 | + | 
|  | 81 | +    #[tokio::test] | 
|  | 82 | +    async fn test_read_java_generated_uncompressed_metric_data() { | 
|  | 83 | +        let mut file = read_test_file("v1/java-generated/sample-metric-data-uncompressed.bin"); | 
|  | 84 | +        assert_eq!(read_blob_as_utf8_string(&mut file, 0), METRIC_BLOB_0); | 
|  | 85 | +        assert_eq!(read_blob_as_utf8_string(&mut file, 1), METRIC_BLOB_1); | 
|  | 86 | +    } | 
|  | 87 | + | 
|  | 88 | +    #[tokio::test] | 
|  | 89 | +    async fn test_read_java_generated_ztd_compressed_metric_data() { | 
|  | 90 | +        let mut file = read_test_file("v1/java-generated/sample-metric-data-compressed-zstd.bin"); | 
|  | 91 | +        assert_eq!(read_blob_as_utf8_string(&mut file, 0), METRIC_BLOB_0); | 
|  | 92 | +        assert_eq!(read_blob_as_utf8_string(&mut file, 1), METRIC_BLOB_1); | 
|  | 93 | +    } | 
|  | 94 | +} | 
0 commit comments