@@ -22,10 +22,12 @@ use std::cmp::Ordering;
2222use std:: collections:: HashMap ;
2323use std:: fmt:: { Display , Formatter } ;
2424use std:: hash:: Hash ;
25+ use std:: io:: Read as _;
2526use std:: sync:: Arc ;
2627
2728use _serde:: TableMetadataEnum ;
2829use chrono:: { DateTime , Utc } ;
30+ use flate2:: read:: GzDecoder ;
2931use serde:: { Deserialize , Serialize } ;
3032use serde_repr:: { Deserialize_repr , Serialize_repr } ;
3133use uuid:: Uuid ;
@@ -413,9 +415,24 @@ impl TableMetadata {
413415 file_io : & FileIO ,
414416 metadata_location : impl AsRef < str > ,
415417 ) -> Result < TableMetadata > {
416- let input_file = file_io. new_input ( metadata_location) ?;
418+ let input_file = file_io. new_input ( metadata_location. as_ref ( ) ) ?;
417419 let metadata_content = input_file. read ( ) . await ?;
418- let metadata = serde_json:: from_slice :: < TableMetadata > ( & metadata_content) ?;
420+
421+ // Check if the file is compressed by looking for the gzip "magic number".
422+ let metadata = if metadata_content. len ( ) > 2
423+ && metadata_content[ 0 ] == 0x1F
424+ && metadata_content[ 1 ] == 0x8B
425+ {
426+ let mut decoder = GzDecoder :: new ( metadata_content. as_ref ( ) ) ;
427+ let mut decompressed_data = Vec :: new ( ) ;
428+ decoder
429+ . read_to_end ( & mut decompressed_data)
430+ . map_err ( |e| Error :: new ( ErrorKind :: DataInvalid , e. to_string ( ) ) ) ?;
431+ serde_json:: from_slice ( & decompressed_data) ?
432+ } else {
433+ serde_json:: from_slice ( & metadata_content) ?
434+ } ;
435+
419436 Ok ( metadata)
420437 }
421438
@@ -1318,6 +1335,7 @@ impl SnapshotLog {
13181335mod tests {
13191336 use std:: collections:: HashMap ;
13201337 use std:: fs;
1338+ use std:: io:: Write as _;
13211339 use std:: sync:: Arc ;
13221340
13231341 use anyhow:: Result ;
@@ -3053,6 +3071,30 @@ mod tests {
30533071 assert_eq ! ( read_metadata, original_metadata) ;
30543072 }
30553073
3074+ #[ tokio:: test]
3075+ async fn test_table_metadata_read_compressed ( ) {
3076+ let temp_dir = TempDir :: new ( ) . unwrap ( ) ;
3077+ let metadata_location = temp_dir. path ( ) . join ( "v1.gz.metadata.json" ) ;
3078+
3079+ let original_metadata: TableMetadata = get_test_table_metadata ( "TableMetadataV2Valid.json" ) ;
3080+ let json = serde_json:: to_string ( & original_metadata) . unwrap ( ) ;
3081+
3082+ let mut encoder = flate2:: write:: GzEncoder :: new ( Vec :: new ( ) , flate2:: Compression :: default ( ) ) ;
3083+ encoder. write_all ( json. as_bytes ( ) ) . unwrap ( ) ;
3084+ std:: fs:: write ( & metadata_location, encoder. finish ( ) . unwrap ( ) )
3085+ . expect ( "failed to write metadata" ) ;
3086+
3087+ // Read the metadata back
3088+ let file_io = FileIOBuilder :: new_fs_io ( ) . build ( ) . unwrap ( ) ;
3089+ let metadata_location = metadata_location. to_str ( ) . unwrap ( ) ;
3090+ let read_metadata = TableMetadata :: read_from ( & file_io, metadata_location)
3091+ . await
3092+ . unwrap ( ) ;
3093+
3094+ // Verify the metadata matches
3095+ assert_eq ! ( read_metadata, original_metadata) ;
3096+ }
3097+
30563098 #[ tokio:: test]
30573099 async fn test_table_metadata_read_nonexistent_file ( ) {
30583100 // Create a FileIO instance
0 commit comments