Skip to content

Commit

Permalink
Support annotation for nullable values in a container
Browse files Browse the repository at this point in the history
  • Loading branch information
Nick Lanham committed Sep 18, 2024
1 parent ed1919a commit aa88b56
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 5 deletions.
25 changes: 21 additions & 4 deletions derive-macros/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use proc_macro2::{Ident, TokenStream};
use quote::{quote, quote_spanned};
use syn::spanned::Spanned;
use syn::{parse_macro_input, Data, DataStruct, DeriveInput, Fields, PathArguments, Type};
use syn::{parse_macro_input, Data, DataStruct, DeriveInput, Fields, Meta, PathArguments, Type};

/// Derive a `delta_kernel::schemas::ToDataType` implementation for the annotated struct. The actual
/// field names in the schema (and therefore of the struct members) are all mandated by the Delta
Expand All @@ -10,7 +10,7 @@ use syn::{parse_macro_input, Data, DataStruct, DeriveInput, Fields, PathArgument
/// Change Metadata](https://github.com/delta-io/delta/blob/master/PROTOCOL.md#change-metadata)
/// action (this macro allows the use of standard rust snake_case, and will convert to the correct
/// delta schema camelCase version).
#[proc_macro_derive(Schema)]
#[proc_macro_derive(Schema, attributes(schema_container_values_null))]
pub fn derive_schema(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
let input = parse_macro_input!(input as DeriveInput);
let struct_ident = input.ident;
Expand All @@ -20,7 +20,7 @@ pub fn derive_schema(input: proc_macro::TokenStream) -> proc_macro::TokenStream
#[automatically_derived]
impl crate::actions::schemas::ToDataType for #struct_ident {
fn to_data_type() -> crate::schema::DataType {
use crate::actions::schemas::{ToDataType, GetStructField};
use crate::actions::schemas::{ToDataType, GetStructField, GetNullableContainerStructField};
crate::schema::StructType::new(vec![
#schema_fields
]).into()
Expand Down Expand Up @@ -64,6 +64,14 @@ fn gen_schema_fields(data: &Data) -> TokenStream {
let schema_fields = fields.iter().map(|field| {
let name = field.ident.as_ref().unwrap(); // we know these are named fields
let name = get_schema_name(name);
let have_schema_null = field.attrs.iter().any(|attr| {
// check if we have schema_map_values_null attr
match &attr.meta {
Meta::Path(path) => path.get_ident().map(|ident| ident == "schema_container_values_null").unwrap_or(false),
_ => false,
}
});

match field.ty {
Type::Path(ref type_path) => {
let type_path_quoted = type_path.path.segments.iter().map(|segment| {
Expand All @@ -74,7 +82,16 @@ fn gen_schema_fields(data: &Data) -> TokenStream {
_ => panic!("Can only handle <> type path args"),
}
});
quote_spanned! { field.span() => #(#type_path_quoted),* get_struct_field(stringify!(#name))}
if have_schema_null {
if let Some(first_ident) = type_path.path.segments.first().map(|seg| &seg.ident) {
if first_ident != "HashMap" && first_ident != "Vec" {
panic!("Can only use schema_container_values_null on HashMap or Vec fields, not {first_ident:?}");
}
}
quote_spanned! { field.span() => #(#type_path_quoted),* get_nullable_container_struct_field(stringify!(#name))}
} else {
quote_spanned! { field.span() => #(#type_path_quoted),* get_struct_field(stringify!(#name))}
}
}
_ => {
panic!("Can't handle type: {:?}", field.ty);
Expand Down
3 changes: 2 additions & 1 deletion kernel/src/actions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ pub(crate) fn get_log_schema() -> &'static StructType {
pub struct Format {
/// Name of the encoding for files in this table
pub provider: String,
/// A map containingconfiguration options for the format
/// A map containing configuration options for the format
pub options: HashMap<String, String>,
}

Expand Down Expand Up @@ -141,6 +141,7 @@ pub struct Add {
pub path: String,

/// A map from partition column to value for this logical file.
#[schema_container_values_null]
pub partition_values: HashMap<String, String>,

/// The size of this data file in bytes
Expand Down
29 changes: 29 additions & 0 deletions kernel/src/actions/schemas.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ pub(crate) trait ToDataType {
fn to_data_type() -> DataType;
}

pub(crate) trait ToNullableContainerType {
fn to_nullable_container_type() -> DataType;
}

macro_rules! impl_to_data_type {
( $(($rust_type: ty, $kernel_type: expr)), * ) => {
$(
Expand Down Expand Up @@ -38,6 +42,13 @@ impl<T: ToDataType> ToDataType for Vec<T> {
}
}

// ToDataType impl for nullable map types
impl<T: ToDataType> ToNullableContainerType for Vec<T> {
fn to_nullable_container_type() -> DataType {
ArrayType::new(T::to_data_type(), true).into()
}
}

impl<T: ToDataType> ToDataType for HashSet<T> {
fn to_data_type() -> DataType {
ArrayType::new(T::to_data_type(), false).into()
Expand All @@ -51,10 +62,28 @@ impl<K: ToDataType, V: ToDataType> ToDataType for HashMap<K, V> {
}
}

// ToDataType impl for nullable map types
impl<K: ToDataType, V: ToDataType> ToNullableContainerType for HashMap<K, V> {
fn to_nullable_container_type() -> DataType {
MapType::new(K::to_data_type(), V::to_data_type(), true).into()
}
}

pub(crate) trait GetStructField {
fn get_struct_field(name: impl Into<String>) -> StructField;
}

pub(crate) trait GetNullableContainerStructField {
fn get_nullable_container_struct_field(name: impl Into<String>) -> StructField;
}

// Normal types produce non-nullable fields
impl<T: ToNullableContainerType> GetNullableContainerStructField for T {
fn get_nullable_container_struct_field(name: impl Into<String>) -> StructField {
StructField::new(name, T::to_nullable_container_type(), false)
}
}

// Normal types produce non-nullable fields
impl<T: ToDataType> GetStructField for T {
fn get_struct_field(name: impl Into<String>) -> StructField {
Expand Down

0 comments on commit aa88b56

Please sign in to comment.