Skip to content

Commit 4570151

Browse files
authored
Merge branch 'main' into fix-approx-percentile-cont-with-weight
2 parents 614c49f + f0e38df commit 4570151

File tree

16 files changed

+187
-138
lines changed

16 files changed

+187
-138
lines changed

.github/workflows/docs.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ jobs:
4141
path: asf-site
4242

4343
- name: Setup Python
44-
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
44+
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
4545
with:
4646
python-version: "3.12"
4747

.github/workflows/docs_pr.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ jobs:
4545
submodules: true
4646
fetch-depth: 1
4747
- name: Setup Python
48-
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
48+
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
4949
with:
5050
python-version: "3.12"
5151
- name: Install doc dependencies

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ async-trait = "0.1.89"
108108
bigdecimal = "0.4.8"
109109
bytes = "1.11"
110110
bzip2 = "0.6.1"
111-
chrono = { version = "0.4.42", default-features = false }
111+
chrono = { version = "0.4.43", default-features = false }
112112
criterion = "0.8"
113113
ctor = "0.6.3"
114114
dashmap = "6.0.1"

datafusion/core/src/execution/session_state.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1843,9 +1843,14 @@ impl ContextProvider for SessionContextProvider<'_> {
18431843
self.state.execution_props().query_execution_start_time,
18441844
);
18451845
let simplifier = ExprSimplifier::new(simplify_context);
1846+
let schema = DFSchema::empty();
18461847
let args = args
18471848
.into_iter()
1848-
.map(|arg| simplifier.simplify(arg))
1849+
.map(|arg| {
1850+
simplifier
1851+
.coerce(arg, &schema)
1852+
.and_then(|e| simplifier.simplify(e))
1853+
})
18491854
.collect::<datafusion_common::Result<Vec<_>>>()?;
18501855
let provider = tbl_func.create_table_provider(&args)?;
18511856

datafusion/core/tests/user_defined/user_defined_table_functions.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,31 @@ impl TableFunctionImpl for SimpleCsvTableFunc {
221221
}
222222
}
223223

224+
/// Test that expressions passed to UDTFs are properly type-coerced
225+
/// This is a regression test for https://github.com/apache/datafusion/issues/19914
226+
#[tokio::test]
227+
async fn test_udtf_type_coercion() -> Result<()> {
228+
use datafusion::datasource::MemTable;
229+
230+
#[derive(Debug)]
231+
struct NoOpTableFunc;
232+
233+
impl TableFunctionImpl for NoOpTableFunc {
234+
fn call(&self, _: &[Expr]) -> Result<Arc<dyn TableProvider>> {
235+
let schema = Arc::new(arrow::datatypes::Schema::empty());
236+
Ok(Arc::new(MemTable::try_new(schema, vec![vec![]])?))
237+
}
238+
}
239+
240+
let ctx = SessionContext::new();
241+
ctx.register_udtf("f", Arc::new(NoOpTableFunc));
242+
243+
// This should not panic - the array elements should be coerced to Float64
244+
let _ = ctx.sql("SELECT * FROM f(ARRAY[0.1, 1, 2])").await?;
245+
246+
Ok(())
247+
}
248+
224249
fn read_csv_batches(csv_path: impl AsRef<Path>) -> Result<(SchemaRef, Vec<RecordBatch>)> {
225250
let mut file = File::open(csv_path)?;
226251
let (schema, _) = Format::default()

datafusion/expr/src/logical_plan/builder.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2779,12 +2779,12 @@ mod tests {
27792779

27802780
assert_snapshot!(plan, @r"
27812781
Union
2782-
Cross Join:
2782+
Cross Join:
27832783
SubqueryAlias: left
27842784
Values: (Int32(1))
27852785
SubqueryAlias: right
27862786
Values: (Int32(1))
2787-
Cross Join:
2787+
Cross Join:
27882788
SubqueryAlias: left
27892789
Values: (Int32(1))
27902790
SubqueryAlias: right

datafusion/expr/src/logical_plan/plan.rs

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1973,13 +1973,16 @@ impl LogicalPlan {
19731973
};
19741974
match join_constraint {
19751975
JoinConstraint::On => {
1976-
write!(
1977-
f,
1978-
"{} Join: {}{}",
1979-
join_type,
1980-
join_expr.join(", "),
1981-
filter_expr
1982-
)
1976+
write!(f, "{join_type} Join:",)?;
1977+
if !join_expr.is_empty() || !filter_expr.is_empty() {
1978+
write!(
1979+
f,
1980+
" {}{}",
1981+
join_expr.join(", "),
1982+
filter_expr
1983+
)?;
1984+
}
1985+
Ok(())
19831986
}
19841987
JoinConstraint::Using => {
19851988
write!(

datafusion/functions/benches/iszero.rs

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ use arrow::{
2323
util::bench_util::create_primitive_array,
2424
};
2525
use criterion::{Criterion, criterion_group, criterion_main};
26+
use datafusion_common::ScalarValue;
2627
use datafusion_common::config::ConfigOptions;
2728
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
2829
use datafusion_functions::math::iszero;
@@ -31,6 +32,8 @@ use std::sync::Arc;
3132

3233
fn criterion_benchmark(c: &mut Criterion) {
3334
let iszero = iszero();
35+
let config_options = Arc::new(ConfigOptions::default());
36+
3437
for size in [1024, 4096, 8192] {
3538
let f32_array = Arc::new(create_primitive_array::<Float32Type>(size, 0.2));
3639
let batch_len = f32_array.len();
@@ -43,7 +46,6 @@ fn criterion_benchmark(c: &mut Criterion) {
4346
})
4447
.collect::<Vec<_>>();
4548
let return_field = Arc::new(Field::new("f", DataType::Boolean, true));
46-
let config_options = Arc::new(ConfigOptions::default());
4749

4850
c.bench_function(&format!("iszero f32 array: {size}"), |b| {
4951
b.iter(|| {
@@ -60,6 +62,7 @@ fn criterion_benchmark(c: &mut Criterion) {
6062
)
6163
})
6264
});
65+
6366
let f64_array = Arc::new(create_primitive_array::<Float64Type>(size, 0.2));
6467
let batch_len = f64_array.len();
6568
let f64_args = vec![ColumnarValue::Array(f64_array)];
@@ -88,6 +91,46 @@ fn criterion_benchmark(c: &mut Criterion) {
8891
})
8992
});
9093
}
94+
95+
// Scalar benchmarks - run once since size doesn't affect scalar performance
96+
let scalar_f32_args = vec![ColumnarValue::Scalar(ScalarValue::Float32(Some(1.0)))];
97+
let scalar_f32_arg_fields = vec![Field::new("a", DataType::Float32, false).into()];
98+
let return_field_scalar = Arc::new(Field::new("f", DataType::Boolean, false));
99+
100+
c.bench_function("iszero f32 scalar", |b| {
101+
b.iter(|| {
102+
black_box(
103+
iszero
104+
.invoke_with_args(ScalarFunctionArgs {
105+
args: scalar_f32_args.clone(),
106+
arg_fields: scalar_f32_arg_fields.clone(),
107+
number_rows: 1,
108+
return_field: Arc::clone(&return_field_scalar),
109+
config_options: Arc::clone(&config_options),
110+
})
111+
.unwrap(),
112+
)
113+
})
114+
});
115+
116+
let scalar_f64_args = vec![ColumnarValue::Scalar(ScalarValue::Float64(Some(1.0)))];
117+
let scalar_f64_arg_fields = vec![Field::new("a", DataType::Float64, false).into()];
118+
119+
c.bench_function("iszero f64 scalar", |b| {
120+
b.iter(|| {
121+
black_box(
122+
iszero
123+
.invoke_with_args(ScalarFunctionArgs {
124+
args: scalar_f64_args.clone(),
125+
arg_fields: scalar_f64_arg_fields.clone(),
126+
number_rows: 1,
127+
return_field: Arc::clone(&return_field_scalar),
128+
config_options: Arc::clone(&config_options),
129+
})
130+
.unwrap(),
131+
)
132+
})
133+
});
91134
}
92135

93136
criterion_group!(benches, criterion_benchmark);

datafusion/functions/src/math/iszero.rs

Lines changed: 46 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,20 @@
1818
use std::any::Any;
1919
use std::sync::Arc;
2020

21-
use arrow::array::{ArrayRef, ArrowNativeTypeOp, AsArray, BooleanArray};
21+
use arrow::array::{ArrowNativeTypeOp, AsArray, BooleanArray};
2222
use arrow::datatypes::DataType::{Boolean, Float16, Float32, Float64};
2323
use arrow::datatypes::{DataType, Float16Type, Float32Type, Float64Type};
2424

2525
use datafusion_common::types::NativeType;
26-
use datafusion_common::{Result, ScalarValue, exec_err};
26+
use datafusion_common::utils::take_function_args;
27+
use datafusion_common::{Result, ScalarValue, internal_err};
2728
use datafusion_expr::{Coercion, TypeSignatureClass};
2829
use datafusion_expr::{
2930
ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
3031
Volatility,
3132
};
3233
use datafusion_macros::user_doc;
3334

34-
use crate::utils::make_scalar_function;
35-
3635
#[user_doc(
3736
doc_section(label = "Math Functions"),
3837
description = "Returns true if a given number is +0.0 or -0.0 otherwise returns false.",
@@ -90,79 +89,53 @@ impl ScalarUDFImpl for IsZeroFunc {
9089
}
9190

9291
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
93-
// Handle NULL input
94-
if args.args[0].data_type().is_null() {
95-
return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
92+
let [arg] = take_function_args(self.name(), args.args)?;
93+
94+
match arg {
95+
ColumnarValue::Scalar(scalar) => {
96+
if scalar.is_null() {
97+
return Ok(ColumnarValue::Scalar(ScalarValue::Boolean(None)));
98+
}
99+
100+
match scalar {
101+
ScalarValue::Float64(Some(v)) => {
102+
Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(v == 0.0))))
103+
}
104+
ScalarValue::Float32(Some(v)) => {
105+
Ok(ColumnarValue::Scalar(ScalarValue::Boolean(Some(v == 0.0))))
106+
}
107+
ScalarValue::Float16(Some(v)) => Ok(ColumnarValue::Scalar(
108+
ScalarValue::Boolean(Some(v.is_zero())),
109+
)),
110+
_ => {
111+
internal_err!(
112+
"Unexpected scalar type for iszero: {:?}",
113+
scalar.data_type()
114+
)
115+
}
116+
}
117+
}
118+
ColumnarValue::Array(array) => match array.data_type() {
119+
Float64 => Ok(ColumnarValue::Array(Arc::new(BooleanArray::from_unary(
120+
array.as_primitive::<Float64Type>(),
121+
|x| x == 0.0,
122+
)))),
123+
Float32 => Ok(ColumnarValue::Array(Arc::new(BooleanArray::from_unary(
124+
array.as_primitive::<Float32Type>(),
125+
|x| x == 0.0,
126+
)))),
127+
Float16 => Ok(ColumnarValue::Array(Arc::new(BooleanArray::from_unary(
128+
array.as_primitive::<Float16Type>(),
129+
|x| x.is_zero(),
130+
)))),
131+
other => {
132+
internal_err!("Unexpected data type {other:?} for function iszero")
133+
}
134+
},
96135
}
97-
make_scalar_function(iszero, vec![])(&args.args)
98136
}
99137

100138
fn documentation(&self) -> Option<&Documentation> {
101139
self.doc()
102140
}
103141
}
104-
105-
/// Iszero SQL function
106-
fn iszero(args: &[ArrayRef]) -> Result<ArrayRef> {
107-
match args[0].data_type() {
108-
Float64 => Ok(Arc::new(BooleanArray::from_unary(
109-
args[0].as_primitive::<Float64Type>(),
110-
|x| x == 0.0,
111-
)) as ArrayRef),
112-
113-
Float32 => Ok(Arc::new(BooleanArray::from_unary(
114-
args[0].as_primitive::<Float32Type>(),
115-
|x| x == 0.0,
116-
)) as ArrayRef),
117-
118-
Float16 => Ok(Arc::new(BooleanArray::from_unary(
119-
args[0].as_primitive::<Float16Type>(),
120-
|x| x.is_zero(),
121-
)) as ArrayRef),
122-
123-
other => exec_err!("Unsupported data type {other:?} for function iszero"),
124-
}
125-
}
126-
127-
#[cfg(test)]
128-
mod test {
129-
use std::sync::Arc;
130-
131-
use arrow::array::{ArrayRef, Float32Array, Float64Array};
132-
133-
use datafusion_common::cast::as_boolean_array;
134-
135-
use crate::math::iszero::iszero;
136-
137-
#[test]
138-
fn test_iszero_f64() {
139-
let args: Vec<ArrayRef> =
140-
vec![Arc::new(Float64Array::from(vec![1.0, 0.0, 3.0, -0.0]))];
141-
142-
let result = iszero(&args).expect("failed to initialize function iszero");
143-
let booleans =
144-
as_boolean_array(&result).expect("failed to initialize function iszero");
145-
146-
assert_eq!(booleans.len(), 4);
147-
assert!(!booleans.value(0));
148-
assert!(booleans.value(1));
149-
assert!(!booleans.value(2));
150-
assert!(booleans.value(3));
151-
}
152-
153-
#[test]
154-
fn test_iszero_f32() {
155-
let args: Vec<ArrayRef> =
156-
vec![Arc::new(Float32Array::from(vec![1.0, 0.0, 3.0, -0.0]))];
157-
158-
let result = iszero(&args).expect("failed to initialize function iszero");
159-
let booleans =
160-
as_boolean_array(&result).expect("failed to initialize function iszero");
161-
162-
assert_eq!(booleans.len(), 4);
163-
assert!(!booleans.value(0));
164-
assert!(booleans.value(1));
165-
assert!(!booleans.value(2));
166-
assert!(booleans.value(3));
167-
}
168-
}

0 commit comments

Comments
 (0)