Skip to content

Commit

Permalink
Merge branch 'main' into expressions_visitor
Browse files Browse the repository at this point in the history
  • Loading branch information
OussamaSaoudi-db authored Sep 27, 2024
2 parents 19a3fa0 + da206ed commit a722343
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 44 deletions.
42 changes: 20 additions & 22 deletions kernel/src/engine/arrow_conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,22 +177,22 @@ impl TryFrom<&ArrowDataType> for DataType {

fn try_from(arrow_datatype: &ArrowDataType) -> Result<Self, ArrowError> {
match arrow_datatype {
ArrowDataType::Utf8 => Ok(DataType::Primitive(PrimitiveType::String)),
ArrowDataType::LargeUtf8 => Ok(DataType::Primitive(PrimitiveType::String)),
ArrowDataType::Int64 => Ok(DataType::Primitive(PrimitiveType::Long)), // undocumented type
ArrowDataType::Int32 => Ok(DataType::Primitive(PrimitiveType::Integer)),
ArrowDataType::Int16 => Ok(DataType::Primitive(PrimitiveType::Short)),
ArrowDataType::Int8 => Ok(DataType::Primitive(PrimitiveType::Byte)),
ArrowDataType::UInt64 => Ok(DataType::Primitive(PrimitiveType::Long)), // undocumented type
ArrowDataType::UInt32 => Ok(DataType::Primitive(PrimitiveType::Integer)),
ArrowDataType::UInt16 => Ok(DataType::Primitive(PrimitiveType::Short)),
ArrowDataType::UInt8 => Ok(DataType::Primitive(PrimitiveType::Byte)),
ArrowDataType::Float32 => Ok(DataType::Primitive(PrimitiveType::Float)),
ArrowDataType::Float64 => Ok(DataType::Primitive(PrimitiveType::Double)),
ArrowDataType::Boolean => Ok(DataType::Primitive(PrimitiveType::Boolean)),
ArrowDataType::Binary => Ok(DataType::Primitive(PrimitiveType::Binary)),
ArrowDataType::FixedSizeBinary(_) => Ok(DataType::Primitive(PrimitiveType::Binary)),
ArrowDataType::LargeBinary => Ok(DataType::Primitive(PrimitiveType::Binary)),
ArrowDataType::Utf8 => Ok(DataType::STRING),
ArrowDataType::LargeUtf8 => Ok(DataType::STRING),
ArrowDataType::Int64 => Ok(DataType::LONG), // undocumented type
ArrowDataType::Int32 => Ok(DataType::INTEGER),
ArrowDataType::Int16 => Ok(DataType::SHORT),
ArrowDataType::Int8 => Ok(DataType::BYTE),
ArrowDataType::UInt64 => Ok(DataType::LONG), // undocumented type
ArrowDataType::UInt32 => Ok(DataType::INTEGER),
ArrowDataType::UInt16 => Ok(DataType::SHORT),
ArrowDataType::UInt8 => Ok(DataType::BYTE),
ArrowDataType::Float32 => Ok(DataType::FLOAT),
ArrowDataType::Float64 => Ok(DataType::DOUBLE),
ArrowDataType::Boolean => Ok(DataType::BOOLEAN),
ArrowDataType::Binary => Ok(DataType::BINARY),
ArrowDataType::FixedSizeBinary(_) => Ok(DataType::BINARY),
ArrowDataType::LargeBinary => Ok(DataType::BINARY),
ArrowDataType::Decimal128(p, s) => {
if *s < 0 {
return Err(ArrowError::from_external_error(
Expand All @@ -202,15 +202,13 @@ impl TryFrom<&ArrowDataType> for DataType {
DataType::decimal(*p, *s as u8)
.map_err(|e| ArrowError::from_external_error(e.into()))
}
ArrowDataType::Date32 => Ok(DataType::Primitive(PrimitiveType::Date)),
ArrowDataType::Date64 => Ok(DataType::Primitive(PrimitiveType::Date)),
ArrowDataType::Timestamp(TimeUnit::Microsecond, None) => {
Ok(DataType::Primitive(PrimitiveType::TimestampNtz))
}
ArrowDataType::Date32 => Ok(DataType::DATE),
ArrowDataType::Date64 => Ok(DataType::DATE),
ArrowDataType::Timestamp(TimeUnit::Microsecond, None) => Ok(DataType::TIMESTAMP_NTZ),
ArrowDataType::Timestamp(TimeUnit::Microsecond, Some(tz))
if tz.eq_ignore_ascii_case("utc") =>
{
Ok(DataType::Primitive(PrimitiveType::Timestamp))
Ok(DataType::TIMESTAMP)
}
ArrowDataType::Struct(fields) => {
let converted_fields: Result<Vec<StructField>, _> = fields
Expand Down
10 changes: 5 additions & 5 deletions kernel/src/engine/arrow_data.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::engine_data::{EngineData, EngineList, EngineMap, GetData};
use crate::schema::{DataType, PrimitiveType, Schema, SchemaRef, StructField};
use crate::schema::{DataType, Schema, SchemaRef, StructField};
use crate::utils::require;
use crate::{DataVisitor, DeltaResult, Error};

Expand Down Expand Up @@ -222,19 +222,19 @@ impl ArrowEngineData {
Some(struct_array),
)?;
}
(&ArrowDataType::Boolean, &DataType::Primitive(PrimitiveType::Boolean)) => {
(&ArrowDataType::Boolean, &DataType::BOOLEAN) => {
debug!("Pushing boolean array for {}", field.name);
out_col_array.push(col.as_boolean());
}
(&ArrowDataType::Utf8, &DataType::Primitive(PrimitiveType::String)) => {
(&ArrowDataType::Utf8, &DataType::STRING) => {
debug!("Pushing string array for {}", field.name);
out_col_array.push(col.as_string());
}
(&ArrowDataType::Int32, &DataType::Primitive(PrimitiveType::Integer)) => {
(&ArrowDataType::Int32, &DataType::INTEGER) => {
debug!("Pushing int32 array for {}", field.name);
out_col_array.push(col.as_primitive::<Int32Type>());
}
(&ArrowDataType::Int64, &DataType::Primitive(PrimitiveType::Long)) => {
(&ArrowDataType::Int64, &DataType::LONG) => {
debug!("Pushing int64 array for {}", field.name);
out_col_array.push(col.as_primitive::<Int64Type>());
}
Expand Down
2 changes: 1 addition & 1 deletion kernel/src/engine/arrow_expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ mod tests {
BinaryOperator::NotIn,
Expression::literal(5),
Expression::literal(Scalar::Array(ArrayData::new(
ArrayType::new(DeltaDataTypes::Primitive(PrimitiveType::Integer), false),
ArrayType::new(DeltaDataTypes::INTEGER, false),
vec![Scalar::Integer(1), Scalar::Integer(2)],
))),
);
Expand Down
6 changes: 3 additions & 3 deletions kernel/src/engine/arrow_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,9 @@ pub(crate) fn ensure_data_types(
(DataType::Primitive(_), _) if arrow_type.is_primitive() => {
check_cast_compat(kernel_type.try_into()?, arrow_type)
}
(DataType::Primitive(PrimitiveType::Boolean), ArrowDataType::Boolean)
| (DataType::Primitive(PrimitiveType::String), ArrowDataType::Utf8)
| (DataType::Primitive(PrimitiveType::Binary), ArrowDataType::Binary) => {
(&DataType::BOOLEAN, ArrowDataType::Boolean)
| (&DataType::STRING, ArrowDataType::Utf8)
| (&DataType::BINARY, ArrowDataType::Binary) => {
// strings, bools, and binary aren't primitive in arrow
Ok(DataTypeCompat::Identical)
}
Expand Down
2 changes: 1 addition & 1 deletion kernel/src/expressions/scalars.rs
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ mod tests {
fn test_arrays() {
#[allow(deprecated)]
let array = Scalar::Array(ArrayData {
tpe: ArrayType::new(DataType::Primitive(PrimitiveType::Integer), false),
tpe: ArrayType::new(DataType::INTEGER, false),
elements: vec![Scalar::Integer(1), Scalar::Integer(2), Scalar::Integer(3)],
});

Expand Down
10 changes: 2 additions & 8 deletions kernel/src/scan/data_skipping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::actions::visitors::SelectionVectorVisitor;
use crate::actions::{get_log_schema, ADD_NAME};
use crate::error::DeltaResult;
use crate::expressions::{BinaryOperator, Expression as Expr, UnaryOperator, VariadicOperator};
use crate::schema::{DataType, PrimitiveType, SchemaRef, StructField, StructType};
use crate::schema::{DataType, SchemaRef, StructField, StructType};
use crate::{Engine, EngineData, ExpressionEvaluator, JsonHandler};

/// Get the expression that checks if a col could be null, assuming tight_bounds = true. In this
Expand Down Expand Up @@ -216,13 +216,7 @@ impl DataSkippingFilter {
StructType::new(
data_fields
.iter()
.map(|data_field| {
StructField::new(
&data_field.name,
DataType::Primitive(PrimitiveType::Long),
true,
)
})
.map(|data_field| StructField::new(&data_field.name, DataType::LONG, true))
.collect(),
),
true,
Expand Down
5 changes: 1 addition & 4 deletions kernel/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -548,10 +548,7 @@ mod tests {
}
"#;
let field: StructField = serde_json::from_str(data).unwrap();
assert!(matches!(
field.data_type,
DataType::Primitive(PrimitiveType::Integer)
));
assert!(matches!(field.data_type, DataType::INTEGER));

let data = r#"
{
Expand Down

0 comments on commit a722343

Please sign in to comment.