-
Notifications
You must be signed in to change notification settings - Fork 60
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support annotation for nullable values in a container #342
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
@@ -1,7 +1,7 @@ | ||||||||
use proc_macro2::{Ident, TokenStream}; | ||||||||
use quote::{quote, quote_spanned}; | ||||||||
use syn::spanned::Spanned; | ||||||||
use syn::{parse_macro_input, Data, DataStruct, DeriveInput, Fields, PathArguments, Type}; | ||||||||
use syn::{parse_macro_input, Data, DataStruct, DeriveInput, Fields, Meta, PathArguments, Type}; | ||||||||
|
||||||||
/// Derive a `delta_kernel::schemas::ToDataType` implementation for the annotated struct. The actual | ||||||||
/// field names in the schema (and therefore of the struct members) are all mandated by the Delta | ||||||||
|
@@ -10,7 +10,7 @@ use syn::{parse_macro_input, Data, DataStruct, DeriveInput, Fields, PathArgument | |||||||
/// Change Metadata](https://github.com/delta-io/delta/blob/master/PROTOCOL.md#change-metadata) | ||||||||
/// action (this macro allows the use of standard rust snake_case, and will convert to the correct | ||||||||
/// delta schema camelCase version). | ||||||||
#[proc_macro_derive(Schema)] | ||||||||
#[proc_macro_derive(Schema, attributes(schema_container_values_null))] | ||||||||
pub fn derive_schema(input: proc_macro::TokenStream) -> proc_macro::TokenStream { | ||||||||
let input = parse_macro_input!(input as DeriveInput); | ||||||||
let struct_ident = input.ident; | ||||||||
|
@@ -20,7 +20,7 @@ pub fn derive_schema(input: proc_macro::TokenStream) -> proc_macro::TokenStream | |||||||
#[automatically_derived] | ||||||||
impl crate::actions::schemas::ToDataType for #struct_ident { | ||||||||
fn to_data_type() -> crate::schema::DataType { | ||||||||
use crate::actions::schemas::{ToDataType, GetStructField}; | ||||||||
use crate::actions::schemas::{ToDataType, GetStructField, GetNullableContainerStructField}; | ||||||||
crate::schema::StructType::new(vec![ | ||||||||
#schema_fields | ||||||||
]).into() | ||||||||
|
@@ -64,6 +64,14 @@ fn gen_schema_fields(data: &Data) -> TokenStream { | |||||||
let schema_fields = fields.iter().map(|field| { | ||||||||
let name = field.ident.as_ref().unwrap(); // we know these are named fields | ||||||||
let name = get_schema_name(name); | ||||||||
let have_schema_null = field.attrs.iter().any(|attr| { | ||||||||
// check if we have schema_map_values_null attr | ||||||||
match &attr.meta { | ||||||||
Meta::Path(path) => path.get_ident().map(|ident| ident == "schema_container_values_null").unwrap_or(false), | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
_ => false, | ||||||||
} | ||||||||
}); | ||||||||
|
||||||||
match field.ty { | ||||||||
Type::Path(ref type_path) => { | ||||||||
let type_path_quoted = type_path.path.segments.iter().map(|segment| { | ||||||||
|
@@ -74,7 +82,16 @@ fn gen_schema_fields(data: &Data) -> TokenStream { | |||||||
_ => panic!("Can only handle <> type path args"), | ||||||||
} | ||||||||
}); | ||||||||
quote_spanned! { field.span() => #(#type_path_quoted),* get_struct_field(stringify!(#name))} | ||||||||
if have_schema_null { | ||||||||
if let Some(first_ident) = type_path.path.segments.first().map(|seg| &seg.ident) { | ||||||||
if first_ident != "HashMap" && first_ident != "Vec" { | ||||||||
panic!("Can only use schema_container_values_null on HashMap or Vec fields, not {first_ident:?}"); | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why would it be correct to delete null entries from a Vec? It would change the indexes of all entries after it, which could cause a problem if the indexes were somehow meaningful? (I also wonder what potential use cases that even serves?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good call. We don't actually have a use case for it yet, so I've removed it and only support |
||||||||
} | ||||||||
} | ||||||||
quote_spanned! { field.span() => #(#type_path_quoted),* get_nullable_container_struct_field(stringify!(#name))} | ||||||||
} else { | ||||||||
quote_spanned! { field.span() => #(#type_path_quoted),* get_struct_field(stringify!(#name))} | ||||||||
} | ||||||||
} | ||||||||
_ => { | ||||||||
panic!("Can't handle type: {:?}", field.ty); | ||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,7 +47,7 @@ pub(crate) fn get_log_schema() -> &'static StructType { | |
pub struct Format { | ||
/// Name of the encoding for files in this table | ||
pub provider: String, | ||
/// A map containingconfiguration options for the format | ||
/// A map containing configuration options for the format | ||
pub options: HashMap<String, String>, | ||
} | ||
|
||
|
@@ -141,6 +141,7 @@ pub struct Add { | |
pub path: String, | ||
|
||
/// A map from partition column to value for this logical file. | ||
#[schema_container_values_null] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should consider However, the above explanation is a bit hard to parse, and doesn't immediately follow from the attribute. That makes me wonder if we need to take a different approach to avoid trouble. For example -- should the attribute be called There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Makes sense. I've renamed it to |
||
pub partition_values: HashMap<String, String>, | ||
|
||
/// The size of this data file in bytes | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we doc-comment what
schema_container_values_null
means?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, added