Skip to content

Commit 1038a5b

Browse files
committed
Make Puffin APIs public
1 parent 04a0d07 commit 1038a5b

File tree

6 files changed

+40
-36
lines changed

6 files changed

+40
-36
lines changed

crates/iceberg/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,4 +88,4 @@ mod utils;
8888
pub mod writer;
8989

9090
mod delete_vector;
91-
mod puffin;
91+
pub mod puffin;

crates/iceberg/src/puffin/blob.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,21 @@
1818
use std::collections::HashMap;
1919

2020
/// A serialized form of a "compact" Theta sketch produced by the Apache DataSketches library.
21-
pub(crate) const APACHE_DATASKETCHES_THETA_V1: &str = "apache-datasketches-theta-v1";
21+
pub const APACHE_DATASKETCHES_THETA_V1: &str = "apache-datasketches-theta-v1";
2222

2323
/// The blob
2424
#[derive(Debug, PartialEq, Clone)]
25-
pub(crate) struct Blob {
25+
pub struct Blob {
2626
/// See blob types: https://iceberg.apache.org/puffin-spec/#blob-types
27-
pub(crate) r#type: String,
27+
pub r#type: String,
2828
/// List of field IDs the blob was computed for; the order of items is used to compute sketches stored in the blob.
29-
pub(crate) fields: Vec<i32>,
29+
pub fields: Vec<i32>,
3030
/// ID of the Iceberg table's snapshot the blob was computed from
31-
pub(crate) snapshot_id: i64,
31+
pub snapshot_id: i64,
3232
/// Sequence number of the Iceberg table's snapshot the blob was computed from
33-
pub(crate) sequence_number: i64,
33+
pub sequence_number: i64,
3434
/// The uncompressed blob data
35-
pub(crate) data: Vec<u8>,
35+
pub data: Vec<u8>,
3636
/// Arbitrary meta-information about the blob
37-
pub(crate) properties: HashMap<String, String>,
37+
pub properties: HashMap<String, String>,
3838
}

crates/iceberg/src/puffin/metadata.rs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,33 +26,33 @@ use crate::{Error, ErrorKind, Result};
2626

2727
/// Human-readable identification of the application writing the file, along with its version.
2828
/// Example: "Trino version 381"
29-
pub(crate) const CREATED_BY_PROPERTY: &str = "created-by";
29+
pub const CREATED_BY_PROPERTY: &str = "created-by";
3030

3131
/// Metadata about a blob.
3232
/// For more information, see: https://iceberg.apache.org/puffin-spec/#blobmetadata
3333
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)]
3434
#[serde(rename_all = "kebab-case")]
35-
pub(crate) struct BlobMetadata {
35+
pub struct BlobMetadata {
3636
/// See blob types: https://iceberg.apache.org/puffin-spec/#blob-types
37-
pub(crate) r#type: String,
37+
pub r#type: String,
3838
/// List of field IDs the blob was computed for; the order of items is used to compute sketches stored in the blob.
39-
pub(crate) fields: Vec<i32>,
39+
pub fields: Vec<i32>,
4040
/// ID of the Iceberg table's snapshot the blob was computed from
41-
pub(crate) snapshot_id: i64,
41+
pub snapshot_id: i64,
4242
/// Sequence number of the Iceberg table's snapshot the blob was computed from
43-
pub(crate) sequence_number: i64,
43+
pub sequence_number: i64,
4444
/// The offset in the file where the blob contents start
45-
pub(crate) offset: u64,
45+
pub offset: u64,
4646
/// The length of the blob stored in the file (after compression, if compressed)
47-
pub(crate) length: u64,
47+
pub length: u64,
4848
/// The compression codec used to compress the data
4949
#[serde(skip_serializing_if = "CompressionCodec::is_none")]
5050
#[serde(default)]
51-
pub(crate) compression_codec: CompressionCodec,
51+
pub compression_codec: CompressionCodec,
5252
/// Arbitrary meta-information about the blob
5353
#[serde(skip_serializing_if = "HashMap::is_empty")]
5454
#[serde(default)]
55-
pub(crate) properties: HashMap<String, String>,
55+
pub properties: HashMap<String, String>,
5656
}
5757

5858
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
@@ -91,13 +91,13 @@ impl Flag {
9191
/// Metadata about a puffin file.
9292
/// For more information, see: https://iceberg.apache.org/puffin-spec/#filemetadata
9393
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)]
94-
pub(crate) struct FileMetadata {
94+
pub struct FileMetadata {
9595
/// Metadata about blobs in file
96-
pub(crate) blobs: Vec<BlobMetadata>,
96+
pub blobs: Vec<BlobMetadata>,
9797
/// Arbitrary meta-information, like writer identification/version.
9898
#[serde(skip_serializing_if = "HashMap::is_empty")]
9999
#[serde(default)]
100-
pub(crate) properties: HashMap<String, String>,
100+
pub properties: HashMap<String, String>,
101101
}
102102

103103
impl FileMetadata {

crates/iceberg/src/puffin/mod.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,23 @@
1818
//! Iceberg Puffin implementation.
1919
2020
#![deny(missing_docs)]
21-
// Temporarily allowing this while crate is under active development
22-
#![allow(dead_code)]
2321

2422
mod blob;
23+
pub use blob::{Blob, APACHE_DATASKETCHES_THETA_V1};
24+
2525
mod compression;
26+
pub use compression::CompressionCodec;
27+
2628
mod metadata;
29+
pub use metadata::{BlobMetadata, FileMetadata, CREATED_BY_PROPERTY};
30+
2731
#[cfg(feature = "tokio")]
2832
mod reader;
33+
#[cfg(feature = "tokio")]
34+
pub use reader::PuffinReader;
35+
2936
mod writer;
37+
pub use writer::PuffinWriter;
3038

3139
#[cfg(test)]
3240
mod test_utils;

crates/iceberg/src/puffin/reader.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,29 +23,29 @@ use crate::puffin::metadata::{BlobMetadata, FileMetadata};
2323
use crate::Result;
2424

2525
/// Puffin reader
26-
pub(crate) struct PuffinReader {
26+
pub struct PuffinReader {
2727
input_file: InputFile,
2828
file_metadata: OnceCell<FileMetadata>,
2929
}
3030

3131
impl PuffinReader {
3232
/// Returns a new Puffin reader
33-
pub(crate) fn new(input_file: InputFile) -> Self {
33+
pub fn new(input_file: InputFile) -> Self {
3434
Self {
3535
input_file,
3636
file_metadata: OnceCell::new(),
3737
}
3838
}
3939

4040
/// Returns file metadata
41-
pub(crate) async fn file_metadata(&self) -> Result<&FileMetadata> {
41+
pub async fn file_metadata(&self) -> Result<&FileMetadata> {
4242
self.file_metadata
4343
.get_or_try_init(|| FileMetadata::read(&self.input_file))
4444
.await
4545
}
4646

4747
/// Returns blob
48-
pub(crate) async fn blob(&self, blob_metadata: &BlobMetadata) -> Result<Blob> {
48+
pub async fn blob(&self, blob_metadata: &BlobMetadata) -> Result<Blob> {
4949
let file_read = self.input_file.reader().await?;
5050
let start = blob_metadata.offset;
5151
let end = start + blob_metadata.length;

crates/iceberg/src/puffin/writer.rs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use crate::puffin::metadata::{BlobMetadata, FileMetadata, Flag};
2626
use crate::Result;
2727

2828
/// Puffin writer
29-
pub(crate) struct PuffinWriter {
29+
pub struct PuffinWriter {
3030
writer: Box<dyn FileWrite>,
3131
is_header_written: bool,
3232
num_bytes_written: u64,
@@ -38,7 +38,7 @@ pub(crate) struct PuffinWriter {
3838

3939
impl PuffinWriter {
4040
/// Returns a new Puffin writer
41-
pub(crate) async fn new(
41+
pub async fn new(
4242
output_file: &OutputFile,
4343
properties: HashMap<String, String>,
4444
compress_footer: bool,
@@ -63,11 +63,7 @@ impl PuffinWriter {
6363
}
6464

6565
/// Adds blob to Puffin file
66-
pub(crate) async fn add(
67-
&mut self,
68-
blob: Blob,
69-
compression_codec: CompressionCodec,
70-
) -> Result<()> {
66+
pub async fn add(&mut self, blob: Blob, compression_codec: CompressionCodec) -> Result<()> {
7167
self.write_header_once().await?;
7268

7369
let offset = self.num_bytes_written;
@@ -89,7 +85,7 @@ impl PuffinWriter {
8985
}
9086

9187
/// Finalizes the Puffin file
92-
pub(crate) async fn close(mut self) -> Result<()> {
88+
pub async fn close(mut self) -> Result<()> {
9389
self.write_header_once().await?;
9490
self.write_footer().await?;
9591
self.writer.close().await?;

0 commit comments

Comments
 (0)