|  | 
|  | 1 | +// Licensed to the Apache Software Foundation (ASF) under one | 
|  | 2 | +// or more contributor license agreements.  See the NOTICE file | 
|  | 3 | +// distributed with this work for additional information | 
|  | 4 | +// regarding copyright ownership.  The ASF licenses this file | 
|  | 5 | +// to you under the Apache License, Version 2.0 (the | 
|  | 6 | +// "License"); you may not use this file except in compliance | 
|  | 7 | +// with the License.  You may obtain a copy of the License at | 
|  | 8 | +// | 
|  | 9 | +//   http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 10 | +// | 
|  | 11 | +// Unless required by applicable law or agreed to in writing, | 
|  | 12 | +// software distributed under the License is distributed on an | 
|  | 13 | +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
|  | 14 | +// KIND, either express or implied.  See the License for the | 
|  | 15 | +// specific language governing permissions and limitations | 
|  | 16 | +// under the License. | 
|  | 17 | + | 
|  | 18 | +use iceberg::{Error, ErrorKind, Result}; | 
|  | 19 | +use serde_derive::{Deserialize, Serialize}; | 
|  | 20 | + | 
|  | 21 | +#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone, Copy)] | 
|  | 22 | +#[serde(rename_all = "lowercase")] | 
|  | 23 | +#[derive(Default)] | 
|  | 24 | +pub enum CompressionCodec { | 
|  | 25 | +    /** No compression */ | 
|  | 26 | +    #[default] | 
|  | 27 | +    None, | 
|  | 28 | +    /** LZ4 single compression frame with content size present */ | 
|  | 29 | +    Lz4, | 
|  | 30 | +    /** Zstandard single compression frame with content size present */ | 
|  | 31 | +    Zstd, | 
|  | 32 | +} | 
|  | 33 | + | 
|  | 34 | +impl CompressionCodec { | 
|  | 35 | +    pub(crate) fn decompress(&self, bytes: Vec<u8>) -> Result<Vec<u8>> { | 
|  | 36 | +        match self { | 
|  | 37 | +            CompressionCodec::None => Ok(bytes), | 
|  | 38 | +            CompressionCodec::Lz4 => Err(Error::new( | 
|  | 39 | +                ErrorKind::FeatureUnsupported, | 
|  | 40 | +                "LZ4 decompression is not supported currently", | 
|  | 41 | +            )), | 
|  | 42 | +            CompressionCodec::Zstd => { | 
|  | 43 | +                let decompressed = zstd::stream::decode_all(&bytes[..])?; | 
|  | 44 | +                Ok(decompressed) | 
|  | 45 | +            } | 
|  | 46 | +        } | 
|  | 47 | +    } | 
|  | 48 | + | 
|  | 49 | +    pub(crate) fn compress(&self, bytes: Vec<u8>) -> Result<Vec<u8>> { | 
|  | 50 | +        match self { | 
|  | 51 | +            CompressionCodec::None => Ok(bytes), | 
|  | 52 | +            CompressionCodec::Lz4 => Err(Error::new( | 
|  | 53 | +                ErrorKind::FeatureUnsupported, | 
|  | 54 | +                "LZ4 compression is not supported currently", | 
|  | 55 | +            )), | 
|  | 56 | +            CompressionCodec::Zstd => { | 
|  | 57 | +                let writer = Vec::<u8>::new(); | 
|  | 58 | +                let mut encoder = zstd::stream::Encoder::new(writer, 3)?; | 
|  | 59 | +                encoder.include_checksum(true)?; | 
|  | 60 | +                encoder.set_pledged_src_size(Some(bytes.len().try_into()?))?; | 
|  | 61 | +                std::io::copy(&mut &bytes[..], &mut encoder)?; | 
|  | 62 | +                let compressed = encoder.finish()?; | 
|  | 63 | +                Ok(compressed) | 
|  | 64 | +            } | 
|  | 65 | +        } | 
|  | 66 | +    } | 
|  | 67 | + | 
|  | 68 | +    pub(crate) fn is_none(&self) -> bool { | 
|  | 69 | +        matches!(self, CompressionCodec::None) | 
|  | 70 | +    } | 
|  | 71 | +} | 
|  | 72 | + | 
|  | 73 | +#[cfg(test)] | 
|  | 74 | +mod tests { | 
|  | 75 | +    use crate::compression::CompressionCodec; | 
|  | 76 | + | 
|  | 77 | +    #[tokio::test] | 
|  | 78 | +    async fn test_compression_codec_none() { | 
|  | 79 | +        let compression_codec = CompressionCodec::None; | 
|  | 80 | +        let bytes_vec = [0_u8; 100].to_vec(); | 
|  | 81 | + | 
|  | 82 | +        let compressed = compression_codec.compress(bytes_vec.clone()).unwrap(); | 
|  | 83 | +        assert_eq!(bytes_vec, compressed); | 
|  | 84 | + | 
|  | 85 | +        let decompressed = compression_codec.decompress(compressed.clone()).unwrap(); | 
|  | 86 | +        assert_eq!(compressed, decompressed) | 
|  | 87 | +    } | 
|  | 88 | + | 
|  | 89 | +    #[tokio::test] | 
|  | 90 | +    async fn test_compression_codec_lz4() { | 
|  | 91 | +        let compression_codec = CompressionCodec::Lz4; | 
|  | 92 | +        let bytes_vec = [0_u8; 100].to_vec(); | 
|  | 93 | + | 
|  | 94 | +        assert_eq!( | 
|  | 95 | +            compression_codec | 
|  | 96 | +                .compress(bytes_vec.clone()) | 
|  | 97 | +                .unwrap_err() | 
|  | 98 | +                .to_string(), | 
|  | 99 | +            "FeatureUnsupported => LZ4 compression is not supported currently", | 
|  | 100 | +        ); | 
|  | 101 | + | 
|  | 102 | +        assert_eq!( | 
|  | 103 | +            compression_codec | 
|  | 104 | +                .decompress(bytes_vec.clone()) | 
|  | 105 | +                .unwrap_err() | 
|  | 106 | +                .to_string(), | 
|  | 107 | +            "FeatureUnsupported => LZ4 decompression is not supported currently", | 
|  | 108 | +        ) | 
|  | 109 | +    } | 
|  | 110 | + | 
|  | 111 | +    #[tokio::test] | 
|  | 112 | +    async fn test_compression_codec_zstd() { | 
|  | 113 | +        let compression_codec = CompressionCodec::Zstd; | 
|  | 114 | +        let bytes_vec = [0_u8; 100].to_vec(); | 
|  | 115 | + | 
|  | 116 | +        let compressed = compression_codec.compress(bytes_vec.clone()).unwrap(); | 
|  | 117 | +        assert!(compressed.len() < bytes_vec.len()); | 
|  | 118 | + | 
|  | 119 | +        let decompressed = compression_codec.decompress(compressed.clone()).unwrap(); | 
|  | 120 | +        assert_eq!(decompressed, bytes_vec) | 
|  | 121 | +    } | 
|  | 122 | +} | 
0 commit comments