diff --git a/kernel/src/engine/arrow_conversion.rs b/kernel/src/engine/arrow_conversion.rs index 0623218dd..9e59bf5ae 100644 --- a/kernel/src/engine/arrow_conversion.rs +++ b/kernel/src/engine/arrow_conversion.rs @@ -177,22 +177,22 @@ impl TryFrom<&ArrowDataType> for DataType { fn try_from(arrow_datatype: &ArrowDataType) -> Result { match arrow_datatype { - ArrowDataType::Utf8 => Ok(DataType::Primitive(PrimitiveType::String)), - ArrowDataType::LargeUtf8 => Ok(DataType::Primitive(PrimitiveType::String)), - ArrowDataType::Int64 => Ok(DataType::Primitive(PrimitiveType::Long)), // undocumented type - ArrowDataType::Int32 => Ok(DataType::Primitive(PrimitiveType::Integer)), - ArrowDataType::Int16 => Ok(DataType::Primitive(PrimitiveType::Short)), - ArrowDataType::Int8 => Ok(DataType::Primitive(PrimitiveType::Byte)), - ArrowDataType::UInt64 => Ok(DataType::Primitive(PrimitiveType::Long)), // undocumented type - ArrowDataType::UInt32 => Ok(DataType::Primitive(PrimitiveType::Integer)), - ArrowDataType::UInt16 => Ok(DataType::Primitive(PrimitiveType::Short)), - ArrowDataType::UInt8 => Ok(DataType::Primitive(PrimitiveType::Byte)), - ArrowDataType::Float32 => Ok(DataType::Primitive(PrimitiveType::Float)), - ArrowDataType::Float64 => Ok(DataType::Primitive(PrimitiveType::Double)), - ArrowDataType::Boolean => Ok(DataType::Primitive(PrimitiveType::Boolean)), - ArrowDataType::Binary => Ok(DataType::Primitive(PrimitiveType::Binary)), - ArrowDataType::FixedSizeBinary(_) => Ok(DataType::Primitive(PrimitiveType::Binary)), - ArrowDataType::LargeBinary => Ok(DataType::Primitive(PrimitiveType::Binary)), + ArrowDataType::Utf8 => Ok(DataType::STRING), + ArrowDataType::LargeUtf8 => Ok(DataType::STRING), + ArrowDataType::Int64 => Ok(DataType::LONG), // undocumented type + ArrowDataType::Int32 => Ok(DataType::INTEGER), + ArrowDataType::Int16 => Ok(DataType::SHORT), + ArrowDataType::Int8 => Ok(DataType::BYTE), + ArrowDataType::UInt64 => Ok(DataType::LONG), // undocumented type + ArrowDataType::UInt32 => Ok(DataType::INTEGER), + ArrowDataType::UInt16 => Ok(DataType::SHORT), + ArrowDataType::UInt8 => Ok(DataType::BYTE), + ArrowDataType::Float32 => Ok(DataType::FLOAT), + ArrowDataType::Float64 => Ok(DataType::DOUBLE), + ArrowDataType::Boolean => Ok(DataType::BOOLEAN), + ArrowDataType::Binary => Ok(DataType::BINARY), + ArrowDataType::FixedSizeBinary(_) => Ok(DataType::BINARY), + ArrowDataType::LargeBinary => Ok(DataType::BINARY), ArrowDataType::Decimal128(p, s) => { if *s < 0 { return Err(ArrowError::from_external_error( @@ -202,15 +202,13 @@ impl TryFrom<&ArrowDataType> for DataType { DataType::decimal(*p, *s as u8) .map_err(|e| ArrowError::from_external_error(e.into())) } - ArrowDataType::Date32 => Ok(DataType::Primitive(PrimitiveType::Date)), - ArrowDataType::Date64 => Ok(DataType::Primitive(PrimitiveType::Date)), - ArrowDataType::Timestamp(TimeUnit::Microsecond, None) => { - Ok(DataType::Primitive(PrimitiveType::TimestampNtz)) - } + ArrowDataType::Date32 => Ok(DataType::DATE), + ArrowDataType::Date64 => Ok(DataType::DATE), + ArrowDataType::Timestamp(TimeUnit::Microsecond, None) => Ok(DataType::TIMESTAMP_NTZ), ArrowDataType::Timestamp(TimeUnit::Microsecond, Some(tz)) if tz.eq_ignore_ascii_case("utc") => { - Ok(DataType::Primitive(PrimitiveType::Timestamp)) + Ok(DataType::TIMESTAMP) } ArrowDataType::Struct(fields) => { let converted_fields: Result, _> = fields diff --git a/kernel/src/engine/arrow_data.rs b/kernel/src/engine/arrow_data.rs index 70c9a1c67..78fe49a9a 100644 --- a/kernel/src/engine/arrow_data.rs +++ b/kernel/src/engine/arrow_data.rs @@ -1,5 +1,5 @@ use crate::engine_data::{EngineData, EngineList, EngineMap, GetData}; -use crate::schema::{DataType, PrimitiveType, Schema, SchemaRef, StructField}; +use crate::schema::{DataType, Schema, SchemaRef, StructField}; use crate::utils::require; use crate::{DataVisitor, DeltaResult, Error}; @@ -222,19 +222,19 @@ impl ArrowEngineData { Some(struct_array), )?; } - (&ArrowDataType::Boolean, &DataType::Primitive(PrimitiveType::Boolean)) => { + (&ArrowDataType::Boolean, &DataType::BOOLEAN) => { debug!("Pushing boolean array for {}", field.name); out_col_array.push(col.as_boolean()); } - (&ArrowDataType::Utf8, &DataType::Primitive(PrimitiveType::String)) => { + (&ArrowDataType::Utf8, &DataType::STRING) => { debug!("Pushing string array for {}", field.name); out_col_array.push(col.as_string()); } - (&ArrowDataType::Int32, &DataType::Primitive(PrimitiveType::Integer)) => { + (&ArrowDataType::Int32, &DataType::INTEGER) => { debug!("Pushing int32 array for {}", field.name); out_col_array.push(col.as_primitive::()); } - (&ArrowDataType::Int64, &DataType::Primitive(PrimitiveType::Long)) => { + (&ArrowDataType::Int64, &DataType::LONG) => { debug!("Pushing int64 array for {}", field.name); out_col_array.push(col.as_primitive::()); } diff --git a/kernel/src/engine/arrow_expression.rs b/kernel/src/engine/arrow_expression.rs index 33bf4c47d..61831917e 100644 --- a/kernel/src/engine/arrow_expression.rs +++ b/kernel/src/engine/arrow_expression.rs @@ -500,7 +500,7 @@ mod tests { BinaryOperator::NotIn, Expression::literal(5), Expression::literal(Scalar::Array(ArrayData::new( - ArrayType::new(DeltaDataTypes::Primitive(PrimitiveType::Integer), false), + ArrayType::new(DeltaDataTypes::INTEGER, false), vec![Scalar::Integer(1), Scalar::Integer(2)], ))), ); diff --git a/kernel/src/engine/arrow_utils.rs b/kernel/src/engine/arrow_utils.rs index 6d1a66d23..843396c9d 100644 --- a/kernel/src/engine/arrow_utils.rs +++ b/kernel/src/engine/arrow_utils.rs @@ -135,9 +135,9 @@ pub(crate) fn ensure_data_types( (DataType::Primitive(_), _) if arrow_type.is_primitive() => { check_cast_compat(kernel_type.try_into()?, arrow_type) } - (DataType::Primitive(PrimitiveType::Boolean), ArrowDataType::Boolean) - | (DataType::Primitive(PrimitiveType::String), ArrowDataType::Utf8) - | (DataType::Primitive(PrimitiveType::Binary), ArrowDataType::Binary) => { + (&DataType::BOOLEAN, ArrowDataType::Boolean) + | (&DataType::STRING, ArrowDataType::Utf8) + | (&DataType::BINARY, ArrowDataType::Binary) => { // strings, bools, and binary aren't primitive in arrow Ok(DataTypeCompat::Identical) } diff --git a/kernel/src/expressions/scalars.rs b/kernel/src/expressions/scalars.rs index 3fa4b1800..2cf50fe53 100644 --- a/kernel/src/expressions/scalars.rs +++ b/kernel/src/expressions/scalars.rs @@ -506,7 +506,7 @@ mod tests { fn test_arrays() { #[allow(deprecated)] let array = Scalar::Array(ArrayData { - tpe: ArrayType::new(DataType::Primitive(PrimitiveType::Integer), false), + tpe: ArrayType::new(DataType::INTEGER, false), elements: vec![Scalar::Integer(1), Scalar::Integer(2), Scalar::Integer(3)], }); diff --git a/kernel/src/scan/data_skipping.rs b/kernel/src/scan/data_skipping.rs index 582efd2dc..9ff675743 100644 --- a/kernel/src/scan/data_skipping.rs +++ b/kernel/src/scan/data_skipping.rs @@ -8,7 +8,7 @@ use crate::actions::visitors::SelectionVectorVisitor; use crate::actions::{get_log_schema, ADD_NAME}; use crate::error::DeltaResult; use crate::expressions::{BinaryOperator, Expression as Expr, UnaryOperator, VariadicOperator}; -use crate::schema::{DataType, PrimitiveType, SchemaRef, StructField, StructType}; +use crate::schema::{DataType, SchemaRef, StructField, StructType}; use crate::{Engine, EngineData, ExpressionEvaluator, JsonHandler}; /// Get the expression that checks if a col could be null, assuming tight_bounds = true. In this @@ -216,13 +216,7 @@ impl DataSkippingFilter { StructType::new( data_fields .iter() - .map(|data_field| { - StructField::new( - &data_field.name, - DataType::Primitive(PrimitiveType::Long), - true, - ) - }) + .map(|data_field| StructField::new(&data_field.name, DataType::LONG, true)) .collect(), ), true, diff --git a/kernel/src/schema.rs b/kernel/src/schema.rs index 10c40ed94..98fd5b8ec 100644 --- a/kernel/src/schema.rs +++ b/kernel/src/schema.rs @@ -548,10 +548,7 @@ mod tests { } "#; let field: StructField = serde_json::from_str(data).unwrap(); - assert!(matches!( - field.data_type, - DataType::Primitive(PrimitiveType::Integer) - )); + assert!(matches!(field.data_type, DataType::INTEGER)); let data = r#" {