Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 104 additions & 24 deletions arrow-schema/src/datatype_display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ impl fmt::Display for DataType {
}
}

fn format_field(field: &crate::Field) -> String {
let name = field.name();
let maybe_nullable = if field.is_nullable() { "nullable " } else { "" };
let data_type = field.data_type();
let metadata_str = format_metadata(field.metadata());
format!("{name:?}: {maybe_nullable}{data_type}{metadata_str}")
}

// A lot of these can still be improved a lot.
// _Some_ of these can be parsed with `FromStr`, but not all (YET!).
// The goal is that the formatting should always be
Expand Down Expand Up @@ -122,13 +130,7 @@ impl fmt::Display for DataType {
if !fields.is_empty() {
let fields_str = fields
.iter()
.map(|field| {
let name = field.name();
let maybe_nullable = if field.is_nullable() { "nullable " } else { "" };
let data_type = field.data_type();
let metadata_str = format_metadata(field.metadata());
format!("{name:?}: {maybe_nullable}{data_type}{metadata_str}")
})
.map(|field| format_field(field))
.collect::<Vec<_>>()
.join(", ");
write!(f, "{fields_str}")?;
Expand All @@ -143,11 +145,8 @@ impl fmt::Display for DataType {
.iter()
.map(|v| {
let type_id = v.0;
let field = v.1;
let maybe_nullable = if field.is_nullable() { "nullable " } else { "" };
let data_type = field.data_type();
let metadata_str = format_metadata(field.metadata());
format!("{type_id:?}: {maybe_nullable}{data_type}{metadata_str}")
let field_str = format_field(v.1);
format!("{type_id:?}: ({field_str})")
})
.collect::<Vec<_>>()
.join(", ");
Expand All @@ -165,20 +164,19 @@ impl fmt::Display for DataType {
Self::Decimal256(precision, scale) => write!(f, "Decimal256({precision}, {scale})"),
Self::Map(field, sorted) => {
write!(f, "Map(")?;
let name = field.name();
let maybe_nullable = if field.is_nullable() { "nullable " } else { "" };
let data_type = field.data_type();
let metadata_str = format_metadata(field.metadata());
let map_field_str = format_field(field);
let keys_are_sorted = if *sorted { "sorted" } else { "unsorted" };

write!(
f,
"\"{name}\": {maybe_nullable}{data_type}{metadata_str}, {keys_are_sorted})"
)?;
write!(f, "{map_field_str}, {keys_are_sorted})")?;
Ok(())
}
Self::RunEndEncoded(run_ends_field, values_field) => {
write!(f, "RunEndEncoded({run_ends_field}, {values_field})")
write!(f, "RunEndEncoded(")?;
let run_ends_str = format_field(run_ends_field);
let values_str = format_field(values_field);

write!(f, "{run_ends_str}, {values_str})")?;
Ok(())
}
}
}
Expand Down Expand Up @@ -391,7 +389,7 @@ mod tests {

let union_data_type = DataType::Union(union_fields, crate::UnionMode::Sparse);
let union_data_type_string = union_data_type.to_string();
let expected_string = "Union(Sparse, 0: Int32, 1: nullable Utf8)";
let expected_string = "Union(Sparse, 0: (\"a\": Int32), 1: (\"b\": nullable Utf8))";
assert_eq!(union_data_type_string, expected_string);

// Test with metadata
Expand All @@ -407,8 +405,7 @@ mod tests {
let union_data_type_with_metadata =
DataType::Union(union_fields_with_metadata, crate::UnionMode::Sparse);
let union_data_type_with_metadata_string = union_data_type_with_metadata.to_string();
let expected_string_with_metadata =
"Union(Sparse, 0: Int32, 1: nullable Utf8, metadata: {\"key\": \"value\"})";
let expected_string_with_metadata = "Union(Sparse, 0: (\"a\": Int32), 1: (\"b\": nullable Utf8, metadata: {\"key\": \"value\"}))";
assert_eq!(
union_data_type_with_metadata_string,
expected_string_with_metadata
Expand Down Expand Up @@ -456,4 +453,87 @@ mod tests {
expected_string_with_metadata
);
}

#[test]
fn test_display_run_end_encoded() {
let run_ends_field = Arc::new(Field::new("run_ends", DataType::UInt32, false));
let values_field = Arc::new(Field::new("values", DataType::Int32, true));
let ree_data_type = DataType::RunEndEncoded(run_ends_field.clone(), values_field.clone());
let ree_data_type_string = ree_data_type.to_string();
let expected_string = "RunEndEncoded(\"run_ends\": UInt32, \"values\": nullable Int32)";
assert_eq!(ree_data_type_string, expected_string);

// Test with metadata
let mut run_ends_field_with_metadata = Field::new("run_ends", DataType::UInt32, false);
let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
run_ends_field_with_metadata.set_metadata(metadata);
let ree_data_type_with_metadata =
DataType::RunEndEncoded(Arc::new(run_ends_field_with_metadata), values_field.clone());
let ree_data_type_with_metadata_string = ree_data_type_with_metadata.to_string();
let expected_string_with_metadata = "RunEndEncoded(\"run_ends\": UInt32, metadata: {\"key\": \"value\"}, \"values\": nullable Int32)";
assert_eq!(
ree_data_type_with_metadata_string,
expected_string_with_metadata
);
}

#[test]
fn test_display_dictionary() {
let dict_data_type =
DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8));
let dict_data_type_string = dict_data_type.to_string();
let expected_string = "Dictionary(Int8, Utf8)";
assert_eq!(dict_data_type_string, expected_string);

// Test with complex index and value types
let complex_dict_data_type = DataType::Dictionary(
Box::new(DataType::Int16),
Box::new(DataType::Struct(
vec![
Field::new("a", DataType::Int32, false),
Field::new("b", DataType::Utf8, true),
]
.into(),
)),
);
let complex_dict_data_type_string = complex_dict_data_type.to_string();
let expected_complex_string =
"Dictionary(Int16, Struct(\"a\": Int32, \"b\": nullable Utf8))";
assert_eq!(complex_dict_data_type_string, expected_complex_string);
}

#[test]
fn test_display_interval() {
let interval_year_month = DataType::Interval(crate::IntervalUnit::YearMonth);
let interval_year_month_string = interval_year_month.to_string();
let expected_year_month_string = "Interval(YearMonth)";
assert_eq!(interval_year_month_string, expected_year_month_string);

let interval_day_time = DataType::Interval(crate::IntervalUnit::DayTime);
let interval_day_time_string = interval_day_time.to_string();
let expected_day_time_string = "Interval(DayTime)";
assert_eq!(interval_day_time_string, expected_day_time_string);

let interval_month_day_nano = DataType::Interval(crate::IntervalUnit::MonthDayNano);
let interval_month_day_nano_string = interval_month_day_nano.to_string();
let expected_month_day_nano_string = "Interval(MonthDayNano)";
assert_eq!(
interval_month_day_nano_string,
expected_month_day_nano_string
);
}

#[test]
fn test_display_timestamp() {
let timestamp_without_tz = DataType::Timestamp(crate::TimeUnit::Microsecond, None);
let timestamp_without_tz_string = timestamp_without_tz.to_string();
let expected_without_tz_string = "Timestamp(µs)";
assert_eq!(timestamp_without_tz_string, expected_without_tz_string);

let timestamp_with_tz =
DataType::Timestamp(crate::TimeUnit::Nanosecond, Some(Arc::from("UTC")));
let timestamp_with_tz_string = timestamp_with_tz.to_string();
let expected_with_tz_string = "Timestamp(ns, \"UTC\")";
assert_eq!(timestamp_with_tz_string, expected_with_tz_string);
}
}
Loading