Skip to content

Commit e9f2b41

Browse files
committed
coalesce
1 parent 25fa27f commit e9f2b41

File tree

4 files changed

+51
-69
lines changed

4 files changed

+51
-69
lines changed

kernel/src/engine/arrow_expression/evaluate_expression.rs

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1185,13 +1185,10 @@ mod tests {
11851185
// Create coalesce expression with column that has no nulls, followed by
11861186
// a reference to a non-existent column. If short-circuit works, the
11871187
// non-existent column is never evaluated and no error occurs.
1188-
let expr = Expression::variadic(
1189-
VariadicExpressionOp::Coalesce,
1190-
vec![
1191-
Expression::column(["a"]),
1192-
Expression::column(["nonexistent"]), // Would fail if evaluated
1193-
],
1194-
);
1188+
let expr = Expression::coalesce([
1189+
Expression::column(["a"]),
1190+
Expression::column(["nonexistent"]), // Would fail if evaluated
1191+
]);
11951192

11961193
// Should return column "a" directly (short-circuit skips evaluating "nonexistent")
11971194
let result = evaluate_expression(&expr, &batch, Some(&DataType::INTEGER)).unwrap();
@@ -1216,14 +1213,11 @@ mod tests {
12161213

12171214
// Create coalesce expression: a has nulls, b has none, c doesn't exist.
12181215
// Short-circuit should stop after evaluating b.
1219-
let expr = Expression::variadic(
1220-
VariadicExpressionOp::Coalesce,
1221-
vec![
1222-
Expression::column(["a"]),
1223-
Expression::column(["b"]),
1224-
Expression::column(["nonexistent"]), // Would fail if evaluated
1225-
],
1226-
);
1216+
let expr = Expression::coalesce([
1217+
Expression::column(["a"]),
1218+
Expression::column(["b"]),
1219+
Expression::column(["nonexistent"]), // Would fail if evaluated
1220+
]);
12271221

12281222
// Should coalesce a and b, never evaluate "nonexistent"
12291223
let result = evaluate_expression(&expr, &batch, Some(&DataType::INTEGER)).unwrap();
@@ -1242,10 +1236,7 @@ mod tests {
12421236
let a_values = Int32Array::from(vec![1, 2, 3]); // No nulls - would short-circuit
12431237
let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a_values)]).unwrap();
12441238

1245-
let expr = Expression::variadic(
1246-
VariadicExpressionOp::Coalesce,
1247-
vec![Expression::column(["a"])],
1248-
);
1239+
let expr = Expression::coalesce([Expression::column(["a"])]);
12491240

12501241
// Request STRING type but array is INT32 - should fail even with short-circuit
12511242
let result = evaluate_expression(&expr, &batch, Some(&DataType::STRING));

kernel/src/expressions/mod.rs

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,14 @@ impl Expression {
701701
Self::Variadic(VariadicExpression::new(op, exprs))
702702
}
703703

704+
/// Creates a new COALESCE expression that returns the first non-null value.
705+
///
706+
/// COALESCE evaluates expressions in order and returns the first non-null result.
707+
/// If all expressions evaluate to null, the result is null.
708+
pub fn coalesce(exprs: impl IntoIterator<Item = impl Into<Expression>>) -> Self {
709+
Self::variadic(VariadicExpressionOp::Coalesce, exprs)
710+
}
711+
704712
/// Creates a new opaque expression
705713
pub fn opaque(
706714
op: impl OpaqueExpressionOp,
@@ -1158,7 +1166,7 @@ mod tests {
11581166
use crate::expressions::scalars::{ArrayData, DecimalData, MapData, StructData};
11591167
use crate::expressions::{
11601168
column_expr, column_name, BinaryExpressionOp, BinaryPredicateOp, ColumnName,
1161-
Expression, Predicate, Scalar, Transform, UnaryExpressionOp, VariadicExpressionOp,
1169+
Expression, Predicate, Scalar, Transform, UnaryExpressionOp,
11621170
};
11631171
use crate::schema::{ArrayType, DataType, DecimalType, MapType, StructField};
11641172
use crate::utils::test_utils::assert_result_error_with_message;
@@ -1297,14 +1305,11 @@ mod tests {
12971305

12981306
#[test]
12991307
fn test_variadic_expression_roundtrip() {
1300-
let expr = Expression::variadic(
1301-
VariadicExpressionOp::Coalesce,
1302-
[
1303-
column_expr!("a"),
1304-
column_expr!("b"),
1305-
Expression::literal("default"),
1306-
],
1307-
);
1308+
let expr = Expression::coalesce([
1309+
column_expr!("a"),
1310+
column_expr!("b"),
1311+
Expression::literal("default"),
1312+
]);
13081313
assert_roundtrip(&expr);
13091314
}
13101315

@@ -1505,10 +1510,7 @@ mod tests {
15051510
column_expr!("c"),
15061511
column_expr!("d"),
15071512
);
1508-
let coalesce = Expression::variadic(
1509-
VariadicExpressionOp::Coalesce,
1510-
[add, mul, Expression::literal(0)],
1511-
);
1513+
let coalesce = Expression::coalesce([add, mul, Expression::literal(0)]);
15121514
let pred = Predicate::gt(coalesce, Expression::literal(100));
15131515
assert_roundtrip(&pred);
15141516

kernel/src/scan/log_replay.rs

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,7 @@ mod tests {
560560

561561
use crate::actions::get_commit_schema;
562562
use crate::engine::sync::SyncEngine;
563-
use crate::expressions::{BinaryExpressionOp, Scalar, VariadicExpressionOp};
563+
use crate::expressions::{BinaryExpressionOp, Scalar};
564564
use crate::log_replay::ActionsBatch;
565565
use crate::scan::state::ScanFile;
566566
use crate::scan::state_info::tests::{
@@ -761,17 +761,14 @@ mod tests {
761761
assert!(row_id_transform.is_replace);
762762
assert_eq!(row_id_transform.exprs.len(), 1);
763763
let expr = &row_id_transform.exprs[0];
764-
let expeceted_expr = Arc::new(Expr::variadic(
765-
VariadicExpressionOp::Coalesce,
766-
vec![
767-
Expr::column(["row_id_col"]),
768-
Expr::binary(
769-
BinaryExpressionOp::Plus,
770-
Expr::literal(42i64),
771-
Expr::column(["row_indexes_for_row_id_0"]),
772-
),
773-
],
774-
));
764+
let expeceted_expr = Arc::new(Expr::coalesce([
765+
Expr::column(["row_id_col"]),
766+
Expr::binary(
767+
BinaryExpressionOp::Plus,
768+
Expr::literal(42i64),
769+
Expr::column(["row_indexes_for_row_id_0"]),
770+
),
771+
]));
775772
assert_eq!(expr, &expeceted_expr);
776773
} else {
777774
panic!("Should have been a transform expression");

kernel/src/transforms.rs

Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,7 @@ use std::sync::Arc;
1010
use itertools::Itertools;
1111
use serde::{Deserialize, Serialize};
1212

13-
use crate::expressions::{
14-
BinaryExpressionOp, Expression, ExpressionRef, Scalar, Transform, VariadicExpressionOp,
15-
};
13+
use crate::expressions::{BinaryExpressionOp, Expression, ExpressionRef, Scalar, Transform};
1614
use crate::schema::{DataType, SchemaRef, StructType};
1715
use crate::table_features::ColumnMappingMode;
1816
use crate::{DeltaResult, Error};
@@ -137,17 +135,14 @@ pub(crate) fn get_transform_expr(
137135
let base_row_id = base_row_id.ok_or_else(|| {
138136
Error::generic("Asked to generate RowIds, but no baseRowId found.")
139137
})?;
140-
let expr = Arc::new(Expression::variadic(
141-
VariadicExpressionOp::Coalesce,
142-
vec![
143-
Expression::column([field_name]),
144-
Expression::binary(
145-
BinaryExpressionOp::Plus,
146-
Expression::literal(base_row_id),
147-
Expression::column([row_index_field_name]),
148-
),
149-
],
150-
));
138+
let expr = Arc::new(Expression::coalesce([
139+
Expression::column([field_name]),
140+
Expression::binary(
141+
BinaryExpressionOp::Plus,
142+
Expression::literal(base_row_id),
143+
Expression::column([row_index_field_name]),
144+
),
145+
]));
151146
transform.with_replaced_field(field_name.clone(), expr)
152147
}
153148
MetadataDerivedColumn {
@@ -592,17 +587,14 @@ mod tests {
592587
.expect("Should have row_id_col transform");
593588
assert!(row_id_transform.is_replace);
594589

595-
let expeceted_expr = Arc::new(Expression::variadic(
596-
VariadicExpressionOp::Coalesce,
597-
vec![
598-
Expression::column(["row_id_col"]),
599-
Expression::binary(
600-
BinaryExpressionOp::Plus,
601-
Expression::literal(4i64),
602-
Expression::column(["row_index_col"]),
603-
),
604-
],
605-
));
590+
let expeceted_expr = Arc::new(Expression::coalesce([
591+
Expression::column(["row_id_col"]),
592+
Expression::binary(
593+
BinaryExpressionOp::Plus,
594+
Expression::literal(4i64),
595+
Expression::column(["row_index_col"]),
596+
),
597+
]));
606598
assert_eq!(row_id_transform.exprs.len(), 1);
607599
let expr = &row_id_transform.exprs[0];
608600
assert_eq!(expr, &expeceted_expr);

0 commit comments

Comments
 (0)