Skip to content

Commit c8ad29c

Browse files
author
Nick Lanham
committed
address comments
1 parent 8cc6bf1 commit c8ad29c

File tree

3 files changed

+48
-13
lines changed

3 files changed

+48
-13
lines changed

derive-macros/src/lib.rs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,12 @@ use syn::{parse_macro_input, Data, DataStruct, DeriveInput, Fields, Meta, PathAr
1010
/// Change Metadata](https://github.com/delta-io/delta/blob/master/PROTOCOL.md#change-metadata)
1111
/// action (this macro allows the use of standard rust snake_case, and will convert to the correct
1212
/// delta schema camelCase version).
13-
#[proc_macro_derive(Schema, attributes(schema_container_values_null))]
13+
///
14+
/// If a field sets `drop_null_container_values`, it means the underlying data can contain null in
15+
/// the values of the container (i.e. a `key` -> `null` in a `HashMap`). Therefore the schema should
16+
/// mark the value field as nullable, but those mappings will be dropped when converting to an
17+
/// actual rust `HashMap`. Currently this can _only_ be set on `HashMap` fields.
18+
#[proc_macro_derive(Schema, attributes(drop_null_container_values))]
1419
pub fn derive_schema(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
1520
let input = parse_macro_input!(input as DeriveInput);
1621
let struct_ident = input.ident;
@@ -67,7 +72,7 @@ fn gen_schema_fields(data: &Data) -> TokenStream {
6772
let have_schema_null = field.attrs.iter().any(|attr| {
6873
// check if we have schema_map_values_null attr
6974
match &attr.meta {
70-
Meta::Path(path) => path.get_ident().map(|ident| ident == "schema_container_values_null").unwrap_or(false),
75+
Meta::Path(path) => path.get_ident().is_some_and(|ident| ident == "drop_null_container_values"),
7176
_ => false,
7277
}
7378
});
@@ -84,8 +89,8 @@ fn gen_schema_fields(data: &Data) -> TokenStream {
8489
});
8590
if have_schema_null {
8691
if let Some(first_ident) = type_path.path.segments.first().map(|seg| &seg.ident) {
87-
if first_ident != "HashMap" && first_ident != "Vec" {
88-
panic!("Can only use schema_container_values_null on HashMap or Vec fields, not {first_ident:?}");
92+
if first_ident != "HashMap" {
93+
panic!("Can only use drop_null_container_values on HashMap fields, not {first_ident:?}");
8994
}
9095
}
9196
quote_spanned! { field.span() => #(#type_path_quoted),* get_nullable_container_struct_field(stringify!(#name))}

kernel/src/actions/mod.rs

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,11 @@ pub struct Add {
140140
/// [RFC 2396 URI Generic Syntax]: https://www.ietf.org/rfc/rfc2396.txt
141141
pub path: String,
142142

143-
/// A map from partition column to value for this logical file.
144-
#[schema_container_values_null]
143+
/// A map from partition column to value for this logical file. This map can contain null in the
144+
/// values meaning a partition is null. We drop those values from this map, due to the
145+
/// `drop_null_container_values` annotation. This means an engine can assume that if a partition
146+
/// is found in [`Metadata`] `partition_columns`, but not in this map, it's value is null.
147+
#[drop_null_container_values]
145148
pub partition_values: HashMap<String, String>,
146149

147150
/// The size of this data file in bytes
@@ -297,6 +300,40 @@ mod tests {
297300
assert_eq!(schema, expected);
298301
}
299302

303+
#[test]
304+
fn test_add_schema() {
305+
let schema = get_log_schema()
306+
.project(&["add"])
307+
.expect("Couldn't get add field");
308+
309+
let expected = Arc::new(StructType::new(vec![StructField::new(
310+
"add",
311+
StructType::new(vec![
312+
StructField::new("path", DataType::STRING, false),
313+
StructField::new(
314+
"partitionValues",
315+
MapType::new(DataType::STRING, DataType::STRING, true),
316+
false,
317+
),
318+
StructField::new("size", DataType::LONG, false),
319+
StructField::new("modificationTime", DataType::LONG, false),
320+
StructField::new("dataChange", DataType::BOOLEAN, false),
321+
StructField::new("stats", DataType::STRING, true),
322+
StructField::new(
323+
"tags",
324+
MapType::new(DataType::STRING, DataType::STRING, false),
325+
true,
326+
),
327+
deletion_vector_field(),
328+
StructField::new("baseRowId", DataType::LONG, true),
329+
StructField::new("defaultRowCommitVersion", DataType::LONG, true),
330+
StructField::new("clusteringProvider", DataType::STRING, true),
331+
]),
332+
true,
333+
)]));
334+
assert_eq!(schema, expected);
335+
}
336+
300337
fn tags_field() -> StructField {
301338
StructField::new(
302339
"tags",

kernel/src/actions/schemas.rs

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,6 @@ impl<T: ToDataType> ToDataType for Vec<T> {
4242
}
4343
}
4444

45-
// ToDataType impl for nullable array types
46-
impl<T: ToDataType> ToNullableContainerType for Vec<T> {
47-
fn to_nullable_container_type() -> DataType {
48-
ArrayType::new(T::to_data_type(), true).into()
49-
}
50-
}
51-
5245
impl<T: ToDataType> ToDataType for HashSet<T> {
5346
fn to_data_type() -> DataType {
5447
ArrayType::new(T::to_data_type(), false).into()

0 commit comments

Comments
 (0)