Skip to content

Commit

Permalink
address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Nick Lanham committed Sep 18, 2024
1 parent 8cc6bf1 commit c8ad29c
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 13 deletions.
13 changes: 9 additions & 4 deletions derive-macros/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@ use syn::{parse_macro_input, Data, DataStruct, DeriveInput, Fields, Meta, PathAr
/// Change Metadata](https://github.com/delta-io/delta/blob/master/PROTOCOL.md#change-metadata)
/// action (this macro allows the use of standard rust snake_case, and will convert to the correct
/// delta schema camelCase version).
#[proc_macro_derive(Schema, attributes(schema_container_values_null))]
///
/// If a field sets `drop_null_container_values`, it means the underlying data can contain null in
/// the values of the container (i.e. a `key` -> `null` in a `HashMap`). Therefore the schema should
/// mark the value field as nullable, but those mappings will be dropped when converting to an
/// actual rust `HashMap`. Currently this can _only_ be set on `HashMap` fields.
#[proc_macro_derive(Schema, attributes(drop_null_container_values))]
pub fn derive_schema(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
let input = parse_macro_input!(input as DeriveInput);
let struct_ident = input.ident;
Expand Down Expand Up @@ -67,7 +72,7 @@ fn gen_schema_fields(data: &Data) -> TokenStream {
let have_schema_null = field.attrs.iter().any(|attr| {
// check if we have schema_map_values_null attr
match &attr.meta {
Meta::Path(path) => path.get_ident().map(|ident| ident == "schema_container_values_null").unwrap_or(false),
Meta::Path(path) => path.get_ident().is_some_and(|ident| ident == "drop_null_container_values"),
_ => false,
}
});
Expand All @@ -84,8 +89,8 @@ fn gen_schema_fields(data: &Data) -> TokenStream {
});
if have_schema_null {
if let Some(first_ident) = type_path.path.segments.first().map(|seg| &seg.ident) {
if first_ident != "HashMap" && first_ident != "Vec" {
panic!("Can only use schema_container_values_null on HashMap or Vec fields, not {first_ident:?}");
if first_ident != "HashMap" {
panic!("Can only use drop_null_container_values on HashMap fields, not {first_ident:?}");
}
}
quote_spanned! { field.span() => #(#type_path_quoted),* get_nullable_container_struct_field(stringify!(#name))}
Expand Down
41 changes: 39 additions & 2 deletions kernel/src/actions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,11 @@ pub struct Add {
/// [RFC 2396 URI Generic Syntax]: https://www.ietf.org/rfc/rfc2396.txt
pub path: String,

/// A map from partition column to value for this logical file.
#[schema_container_values_null]
/// A map from partition column to value for this logical file. This map can contain null in the
/// values meaning a partition is null. We drop those values from this map, due to the
/// `drop_null_container_values` annotation. This means an engine can assume that if a partition
/// is found in [`Metadata`] `partition_columns`, but not in this map, it's value is null.
#[drop_null_container_values]
pub partition_values: HashMap<String, String>,

/// The size of this data file in bytes
Expand Down Expand Up @@ -297,6 +300,40 @@ mod tests {
assert_eq!(schema, expected);
}

#[test]
fn test_add_schema() {
let schema = get_log_schema()
.project(&["add"])
.expect("Couldn't get add field");

let expected = Arc::new(StructType::new(vec![StructField::new(
"add",
StructType::new(vec![
StructField::new("path", DataType::STRING, false),
StructField::new(
"partitionValues",
MapType::new(DataType::STRING, DataType::STRING, true),
false,
),
StructField::new("size", DataType::LONG, false),
StructField::new("modificationTime", DataType::LONG, false),
StructField::new("dataChange", DataType::BOOLEAN, false),
StructField::new("stats", DataType::STRING, true),
StructField::new(
"tags",
MapType::new(DataType::STRING, DataType::STRING, false),
true,
),
deletion_vector_field(),
StructField::new("baseRowId", DataType::LONG, true),
StructField::new("defaultRowCommitVersion", DataType::LONG, true),
StructField::new("clusteringProvider", DataType::STRING, true),
]),
true,
)]));
assert_eq!(schema, expected);
}

fn tags_field() -> StructField {
StructField::new(
"tags",
Expand Down
7 changes: 0 additions & 7 deletions kernel/src/actions/schemas.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,6 @@ impl<T: ToDataType> ToDataType for Vec<T> {
}
}

// ToDataType impl for nullable array types
impl<T: ToDataType> ToNullableContainerType for Vec<T> {
fn to_nullable_container_type() -> DataType {
ArrayType::new(T::to_data_type(), true).into()
}
}

impl<T: ToDataType> ToDataType for HashSet<T> {
fn to_data_type() -> DataType {
ArrayType::new(T::to_data_type(), false).into()
Expand Down

0 comments on commit c8ad29c

Please sign in to comment.