Skip to content

Commit 23c0f09

Browse files
authored
fix: support fmin and fmax in interpreter on vector types (#12457)
* fix: support `fmin` and `fmax` in interpreter * add fmax-vector and fmin-vector tests
1 parent 813c44a commit 23c0f09

File tree

3 files changed

+156
-14
lines changed

3 files changed

+156
-14
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
test interpret
2+
test run
3+
target aarch64
4+
target s390x
5+
target x86_64
6+
target x86_64 has_avx
7+
set enable_multi_ret_implicit_sret
8+
target riscv64 has_v
9+
target riscv64 has_v has_c has_zcb
10+
target pulley32
11+
target pulley32be
12+
target pulley64
13+
target pulley64be
14+
15+
function %fmax_f32x4(f32x4, f32x4) -> f32x4 {
16+
block0(v0: f32x4, v1: f32x4):
17+
v2 = fmax v0, v1
18+
return v2
19+
}
20+
; run: %fmax_f32x4([0x0.0 0x1.0 0x2.0 0x3.0], [0x0.0 0x1.0 0x2.0 0x3.0]) == [0x0.0 0x1.0 0x2.0 0x3.0]
21+
; run: %fmax_f32x4([0x1.0p10 0x1.0p11 0x1.0p12 0x1.0p13], [0x1.0p10 0x1.0p10 0x1.0p10 0x1.0p10]) == [0x1.0p10 0x1.0p11 0x1.0p12 0x1.0p13]
22+
; run: %fmax_f32x4([0x1.0 0x2.0 0x3.0 0x4.0], [0x0.5 0x3.0 0x2.5 0x5.0]) == [0x1.0 0x3.0 0x3.0 0x5.0]
23+
; run: %fmax_f32x4([-0x0.0 -0x0.0 -0x0.0 -0x0.0], [0x0.0 0x0.0 0x0.0 0x0.0]) == [0x0.0 0x0.0 0x0.0 0x0.0]
24+
; run: %fmax_f32x4([+Inf +Inf +Inf +Inf], [0x0.0 0x0.0 0x0.0 0x0.0]) == [+Inf +Inf +Inf +Inf]
25+
; run: %fmax_f32x4([-Inf -Inf -Inf -Inf], [0x0.0 0x0.0 0x0.0 0x0.0]) == [0x0.0 0x0.0 0x0.0 0x0.0]
26+
; run: %fmax_f32x4([+Inf -Inf +Inf -Inf], [0x0.0 0x0.0 0x0.0 0x0.0]) == [+Inf 0x0.0 +Inf 0x0.0]
27+
28+
; run: %fmax_f32x4([0x1.000002p-23 0x1.000000p-23 0x1.000002p-23 0x1.000000p-23], [0x1.000000p-23 0x1.000002p-23 0x1.000000p-23 0x1.000002p-23]) == [0x1.000002p-23 0x1.000002p-23 0x1.000002p-23 0x1.000002p-23]
29+
; run: %fmax_f32x4([0x1.fffffcp127 0x1.fffffep127 0x1.fffffcp127 0x1.fffffep127], [0x1.fffffep127 0x1.fffffcp127 0x1.fffffep127 0x1.fffffcp127]) == [0x1.fffffep127 0x1.fffffep127 0x1.fffffep127 0x1.fffffep127]
30+
; run: %fmax_f32x4([0x1.000000p-126 0x1.000000p-126 0x1.000000p-126 0x1.000000p-126], [0x1.000000p-126 0x1.000000p-126 0x1.000000p-126 0x1.000000p-126]) == [0x1.000000p-126 0x1.000000p-126 0x1.000000p-126 0x1.000000p-126]
31+
32+
; run: %fmax_f32x4([0x0.800002p-126 0x0.800000p-126 -0x0.800002p-126 -0x0.800000p-126], [-0x0.800002p-126 -0x0.800000p-126 0x0.800002p-126 0x0.800000p-126]) == [0x0.800002p-126 0x0.800000p-126 0x0.800002p-126 0x0.800000p-126]
33+
34+
function %fmax_f64x2(f64x2, f64x2) -> f64x2 {
35+
block0(v0: f64x2, v1: f64x2):
36+
v2 = fmax v0, v1
37+
return v2
38+
}
39+
; run: %fmax_f64x2([0x0.0 0x1.0], [0x0.0 0x1.0]) == [0x0.0 0x1.0]
40+
; run: %fmax_f64x2([0x1.0p10 0x1.0p11], [0x1.0p10 0x1.0p10]) == [0x1.0p10 0x1.0p11]
41+
; run: %fmax_f64x2([0x1.0 0x2.0], [0x0.5 0x3.0]) == [0x1.0 0x3.0]
42+
; run: %fmax_f64x2([-0x0.0 -0x0.0], [0x0.0 0x0.0]) == [0x0.0 0x0.0]
43+
; run: %fmax_f64x2([+Inf +Inf], [0x0.0 0x0.0]) == [+Inf +Inf]
44+
; run: %fmax_f64x2([-Inf -Inf], [0x0.0 0x0.0]) == [0x0.0 0x0.0]
45+
; run: %fmax_f64x2([+Inf -Inf], [0x0.0 0x0.0]) == [+Inf 0x0.0]
46+
47+
; run: %fmax_f64x2([0x1.0000000000002p-52 0x1.0000000000000p-52], [0x1.0000000000000p-52 0x1.0000000000002p-52]) == [0x1.0000000000002p-52 0x1.0000000000002p-52]
48+
; run: %fmax_f64x2([0x1.ffffffffffffcp1023 0x1.fffffffffffffp1023], [0x1.fffffffffffffp1023 0x1.ffffffffffffcp1023]) == [0x1.fffffffffffffp1023 0x1.fffffffffffffp1023]
49+
; run: %fmax_f64x2([0x1.0000000000000p-1022 0x1.0000000000000p-1022], [0x1.0000000000000p-1022 0x1.0000000000000p-1022]) == [0x1.0000000000000p-1022 0x1.0000000000000p-1022]
50+
51+
; run: %fmax_f64x2([0x0.8000000000002p-1022 0x0.8000000000000p-1022], [-0x0.8000000000002p-1022 -0x0.8000000000000p-1022]) == [0x0.8000000000002p-1022 0x0.8000000000000p-1022]
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
test interpret
2+
test run
3+
target aarch64
4+
target s390x
5+
target x86_64
6+
target x86_64 has_avx
7+
set enable_multi_ret_implicit_sret
8+
target riscv64 has_v
9+
target riscv64 has_v has_c has_zcb
10+
target pulley32
11+
target pulley32be
12+
target pulley64
13+
target pulley64be
14+
15+
function %fmin_f32x4(f32x4, f32x4) -> f32x4 {
16+
block0(v0: f32x4, v1: f32x4):
17+
v2 = fmin v0, v1
18+
return v2
19+
}
20+
; run: %fmin_f32x4([0x0.0 0x1.0 0x2.0 0x3.0], [0x0.0 0x1.0 0x2.0 0x3.0]) == [0x0.0 0x1.0 0x2.0 0x3.0]
21+
; run: %fmin_f32x4([0x1.0p10 0x1.0p11 0x1.0p12 0x1.0p13], [0x1.0p10 0x1.0p10 0x1.0p10 0x1.0p10]) == [0x1.0p10 0x1.0p10 0x1.0p10 0x1.0p10]
22+
; run: %fmin_f32x4([0x1.0 0x2.0 0x3.0 0x4.0], [0x0.5 0x3.0 0x2.5 0x5.0]) == [0x0.5 0x2.0 0x2.5 0x4.0]
23+
; run: %fmin_f32x4([-0x0.0 -0x0.0 -0x0.0 -0x0.0], [0x0.0 0x0.0 0x0.0 0x0.0]) == [-0x0.0 -0x0.0 -0x0.0 -0x0.0]
24+
; run: %fmin_f32x4([+Inf +Inf +Inf +Inf], [0x0.0 0x0.0 0x0.0 0x0.0]) == [0x0.0 0x0.0 0x0.0 0x0.0]
25+
; run: %fmin_f32x4([-Inf -Inf -Inf -Inf], [0x0.0 0x0.0 0x0.0 0x0.0]) == [-Inf -Inf -Inf -Inf]
26+
; run: %fmin_f32x4([+Inf -Inf +Inf -Inf], [0x0.0 0x0.0 0x0.0 0x0.0]) == [0x0.0 -Inf 0x0.0 -Inf]
27+
28+
; run: %fmin_f32x4([0x1.000002p-23 0x1.000000p-23 0x1.000002p-23 0x1.000000p-23], [0x1.000000p-23 0x1.000002p-23 0x1.000000p-23 0x1.000002p-23]) == [0x1.000000p-23 0x1.000000p-23 0x1.000000p-23 0x1.000000p-23]
29+
; run: %fmin_f32x4([0x1.fffffcp127 0x1.fffffep127 0x1.fffffcp127 0x1.fffffep127], [0x1.fffffep127 0x1.fffffcp127 0x1.fffffep127 0x1.fffffcp127]) == [0x1.fffffcp127 0x1.fffffcp127 0x1.fffffcp127 0x1.fffffcp127]
30+
; run: %fmin_f32x4([0x1.000000p-126 0x1.000000p-126 0x1.000000p-126 0x1.000000p-126], [0x1.000000p-126 0x1.000000p-126 0x1.000000p-126 0x1.000000p-126]) == [0x1.000000p-126 0x1.000000p-126 0x1.000000p-126 0x1.000000p-126]
31+
32+
; run: %fmin_f32x4([0x0.800002p-126 0x0.800000p-126 -0x0.800002p-126 -0x0.800000p-126], [-0x0.800002p-126 -0x0.800000p-126 0x0.800002p-126 0x0.800000p-126]) == [-0x0.800002p-126 -0x0.800000p-126 -0x0.800002p-126 -0x0.800000p-126]
33+
34+
function %fmin_f64x2(f64x2, f64x2) -> f64x2 {
35+
block0(v0: f64x2, v1: f64x2):
36+
v2 = fmin v0, v1
37+
return v2
38+
}
39+
; run: %fmin_f64x2([0x0.0 0x1.0], [0x0.0 0x1.0]) == [0x0.0 0x1.0]
40+
; run: %fmin_f64x2([0x1.0p10 0x1.0p11], [0x1.0p10 0x1.0p10]) == [0x1.0p10 0x1.0p10]
41+
; run: %fmin_f64x2([0x1.0 0x2.0], [0x0.5 0x3.0]) == [0x0.5 0x2.0]
42+
; run: %fmin_f64x2([-0x0.0 -0x0.0], [0x0.0 0x0.0]) == [-0x0.0 -0x0.0]
43+
; run: %fmin_f64x2([+Inf +Inf], [0x0.0 0x0.0]) == [0x0.0 0x0.0]
44+
; run: %fmin_f64x2([-Inf -Inf], [0x0.0 0x0.0]) == [-Inf -Inf]
45+
; run: %fmin_f64x2([+Inf -Inf], [0x0.0 0x0.0]) == [0x0.0 -Inf]
46+
47+
; run: %fmin_f64x2([0x1.0000000000002p-52 0x1.0000000000000p-52], [0x1.0000000000000p-52 0x1.0000000000002p-52]) == [0x1.0000000000000p-52 0x1.0000000000000p-52]
48+
; run: %fmin_f64x2([0x1.ffffffffffffcp1023 0x1.fffffffffffffp1023], [0x1.fffffffffffffp1023 0x1.ffffffffffffcp1023]) == [0x1.ffffffffffffcp1023 0x1.ffffffffffffcp1023]
49+
; run: %fmin_f64x2([0x1.0000000000000p-1022 0x1.0000000000000p-1022], [0x1.0000000000000p-1022 0x1.0000000000000p-1022]) == [0x1.0000000000000p-1022 0x1.0000000000000p-1022]
50+
51+
; run: %fmin_f64x2([0x0.8000000000002p-1022 0x0.8000000000000p-1022], [-0x0.8000000000002p-1022 -0x0.8000000000000p-1022]) == [-0x0.8000000000002p-1022 -0x0.8000000000000p-1022]

cranelift/interpreter/src/step.rs

Lines changed: 54 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -886,20 +886,60 @@ where
886886
Opcode::Fneg => unary(DataValueExt::neg, arg(0))?,
887887
Opcode::Fabs => unary(DataValueExt::abs, arg(0))?,
888888
Opcode::Fcopysign => binary(DataValueExt::copysign, arg(0), arg(1))?,
889-
Opcode::Fmin => assign(match (arg(0), arg(1)) {
890-
(a, _) if a.is_nan()? => a,
891-
(_, b) if b.is_nan()? => b,
892-
(a, b) if a.is_zero()? && b.is_zero()? && a.is_negative()? => a,
893-
(a, b) if a.is_zero()? && b.is_zero()? && b.is_negative()? => b,
894-
(a, b) => a.smin(b)?,
895-
}),
896-
Opcode::Fmax => assign(match (arg(0), arg(1)) {
897-
(a, _) if a.is_nan()? => a,
898-
(_, b) if b.is_nan()? => b,
899-
(a, b) if a.is_zero()? && b.is_zero()? && a.is_negative()? => b,
900-
(a, b) if a.is_zero()? && b.is_zero()? && b.is_negative()? => a,
901-
(a, b) => a.smax(b)?,
902-
}),
889+
Opcode::Fmin => {
890+
let scalar_min = |a: DataValue, b: DataValue| -> ValueResult<DataValue> {
891+
Ok(match (a, b) {
892+
(a, _) if a.is_nan()? => a,
893+
(_, b) if b.is_nan()? => b,
894+
(a, b) if a.is_zero()? && b.is_zero()? && a.is_negative()? => a,
895+
(a, b) if a.is_zero()? && b.is_zero()? && b.is_negative()? => b,
896+
(a, b) => a.smin(b)?,
897+
})
898+
};
899+
900+
if ctrl_ty.is_vector() {
901+
let arg0 = extractlanes(&arg(0), ctrl_ty)?;
902+
let arg1 = extractlanes(&arg(1), ctrl_ty)?;
903+
904+
assign(vectorizelanes(
905+
&(arg0
906+
.into_iter()
907+
.zip(arg1.into_iter())
908+
.map(|(a, b)| scalar_min(a, b))
909+
.collect::<ValueResult<SimdVec<DataValue>>>()?),
910+
ctrl_ty,
911+
)?)
912+
} else {
913+
assign(scalar_min(arg(0), arg(1))?)
914+
}
915+
}
916+
Opcode::Fmax => {
917+
let scalar_max = |a: DataValue, b: DataValue| -> ValueResult<DataValue> {
918+
Ok(match (a, b) {
919+
(a, _) if a.is_nan()? => a,
920+
(_, b) if b.is_nan()? => b,
921+
(a, b) if a.is_zero()? && b.is_zero()? && a.is_negative()? => b,
922+
(a, b) if a.is_zero()? && b.is_zero()? && b.is_negative()? => a,
923+
(a, b) => a.smax(b)?,
924+
})
925+
};
926+
927+
if ctrl_ty.is_vector() {
928+
let arg0 = extractlanes(&arg(0), ctrl_ty)?;
929+
let arg1 = extractlanes(&arg(1), ctrl_ty)?;
930+
931+
assign(vectorizelanes(
932+
&(arg0
933+
.into_iter()
934+
.zip(arg1.into_iter())
935+
.map(|(a, b)| scalar_max(a, b))
936+
.collect::<ValueResult<SimdVec<DataValue>>>()?),
937+
ctrl_ty,
938+
)?)
939+
} else {
940+
assign(scalar_max(arg(0), arg(1))?)
941+
}
942+
}
903943
Opcode::Ceil => unary(DataValueExt::ceil, arg(0))?,
904944
Opcode::Floor => unary(DataValueExt::floor, arg(0))?,
905945
Opcode::Trunc => unary(DataValueExt::trunc, arg(0))?,

0 commit comments

Comments
 (0)