fix: support fmin and fmax in interpreter on vector types (#12457)

tommyscholly · web-flow · commit 23c0f09d0ba1 · 2026-01-29T02:14:30.000Z
* fix: support `fmin` and `fmax` in interpreter

* add fmax-vector and fmin-vector tests
diff --git a/cranelift/filetests/filetests/runtests/fmax-vector.clif b/cranelift/filetests/filetests/runtests/fmax-vector.clif
@@ -0,0 +1,51 @@
+test interpret
+test run
+target aarch64
+target s390x
+target x86_64
+target x86_64 has_avx
+set enable_multi_ret_implicit_sret
+target riscv64 has_v
+target riscv64 has_v has_c has_zcb
+target pulley32
+target pulley32be
+target pulley64
+target pulley64be
+
+function %fmax_f32x4(f32x4, f32x4) -> f32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fmax v0, v1
+    return v2
+}
+; run: %fmax_f32x4([0x0.0 0x1.0 0x2.0 0x3.0], [0x0.0 0x1.0 0x2.0 0x3.0]) == [0x0.0 0x1.0 0x2.0 0x3.0]
+; run: %fmax_f32x4([0x1.0p10 0x1.0p11 0x1.0p12 0x1.0p13], [0x1.0p10 0x1.0p10 0x1.0p10 0x1.0p10]) == [0x1.0p10 0x1.0p11 0x1.0p12 0x1.0p13]
+; run: %fmax_f32x4([0x1.0 0x2.0 0x3.0 0x4.0], [0x0.5 0x3.0 0x2.5 0x5.0]) == [0x1.0 0x3.0 0x3.0 0x5.0]
+; run: %fmax_f32x4([-0x0.0 -0x0.0 -0x0.0 -0x0.0], [0x0.0 0x0.0 0x0.0 0x0.0]) == [0x0.0 0x0.0 0x0.0 0x0.0]
+; run: %fmax_f32x4([+Inf +Inf +Inf +Inf], [0x0.0 0x0.0 0x0.0 0x0.0]) == [+Inf +Inf +Inf +Inf]
+; run: %fmax_f32x4([-Inf -Inf -Inf -Inf], [0x0.0 0x0.0 0x0.0 0x0.0]) == [0x0.0 0x0.0 0x0.0 0x0.0]
+; run: %fmax_f32x4([+Inf -Inf +Inf -Inf], [0x0.0 0x0.0 0x0.0 0x0.0]) == [+Inf 0x0.0 +Inf 0x0.0]
+
+; run: %fmax_f32x4([0x1.000002p-23 0x1.000000p-23 0x1.000002p-23 0x1.000000p-23], [0x1.000000p-23 0x1.000002p-23 0x1.000000p-23 0x1.000002p-23]) == [0x1.000002p-23 0x1.000002p-23 0x1.000002p-23 0x1.000002p-23]
+; run: %fmax_f32x4([0x1.fffffcp127 0x1.fffffep127 0x1.fffffcp127 0x1.fffffep127], [0x1.fffffep127 0x1.fffffcp127 0x1.fffffep127 0x1.fffffcp127]) == [0x1.fffffep127 0x1.fffffep127 0x1.fffffep127 0x1.fffffep127]
+; run: %fmax_f32x4([0x1.000000p-126 0x1.000000p-126 0x1.000000p-126 0x1.000000p-126], [0x1.000000p-126 0x1.000000p-126 0x1.000000p-126 0x1.000000p-126]) == [0x1.000000p-126 0x1.000000p-126 0x1.000000p-126 0x1.000000p-126]
+
+; run: %fmax_f32x4([0x0.800002p-126 0x0.800000p-126 -0x0.800002p-126 -0x0.800000p-126], [-0x0.800002p-126 -0x0.800000p-126 0x0.800002p-126 0x0.800000p-126]) == [0x0.800002p-126 0x0.800000p-126 0x0.800002p-126 0x0.800000p-126]
+
+function %fmax_f64x2(f64x2, f64x2) -> f64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fmax v0, v1
+    return v2
+}
+; run: %fmax_f64x2([0x0.0 0x1.0], [0x0.0 0x1.0]) == [0x0.0 0x1.0]
+; run: %fmax_f64x2([0x1.0p10 0x1.0p11], [0x1.0p10 0x1.0p10]) == [0x1.0p10 0x1.0p11]
+; run: %fmax_f64x2([0x1.0 0x2.0], [0x0.5 0x3.0]) == [0x1.0 0x3.0]
+; run: %fmax_f64x2([-0x0.0 -0x0.0], [0x0.0 0x0.0]) == [0x0.0 0x0.0]
+; run: %fmax_f64x2([+Inf +Inf], [0x0.0 0x0.0]) == [+Inf +Inf]
+; run: %fmax_f64x2([-Inf -Inf], [0x0.0 0x0.0]) == [0x0.0 0x0.0]
+; run: %fmax_f64x2([+Inf -Inf], [0x0.0 0x0.0]) == [+Inf 0x0.0]
+
+; run: %fmax_f64x2([0x1.0000000000002p-52 0x1.0000000000000p-52], [0x1.0000000000000p-52 0x1.0000000000002p-52]) == [0x1.0000000000002p-52 0x1.0000000000002p-52]
+; run: %fmax_f64x2([0x1.ffffffffffffcp1023 0x1.fffffffffffffp1023], [0x1.fffffffffffffp1023 0x1.ffffffffffffcp1023]) == [0x1.fffffffffffffp1023 0x1.fffffffffffffp1023]
+; run: %fmax_f64x2([0x1.0000000000000p-1022 0x1.0000000000000p-1022], [0x1.0000000000000p-1022 0x1.0000000000000p-1022]) == [0x1.0000000000000p-1022 0x1.0000000000000p-1022]
+
+; run: %fmax_f64x2([0x0.8000000000002p-1022 0x0.8000000000000p-1022], [-0x0.8000000000002p-1022 -0x0.8000000000000p-1022]) == [0x0.8000000000002p-1022 0x0.8000000000000p-1022]
diff --git a/cranelift/filetests/filetests/runtests/fmin-vector.clif b/cranelift/filetests/filetests/runtests/fmin-vector.clif
@@ -0,0 +1,51 @@
+test interpret
+test run
+target aarch64
+target s390x
+target x86_64
+target x86_64 has_avx
+set enable_multi_ret_implicit_sret
+target riscv64 has_v
+target riscv64 has_v has_c has_zcb
+target pulley32
+target pulley32be
+target pulley64
+target pulley64be
+
+function %fmin_f32x4(f32x4, f32x4) -> f32x4 {
+block0(v0: f32x4, v1: f32x4):
+    v2 = fmin v0, v1
+    return v2
+}
+; run: %fmin_f32x4([0x0.0 0x1.0 0x2.0 0x3.0], [0x0.0 0x1.0 0x2.0 0x3.0]) == [0x0.0 0x1.0 0x2.0 0x3.0]
+; run: %fmin_f32x4([0x1.0p10 0x1.0p11 0x1.0p12 0x1.0p13], [0x1.0p10 0x1.0p10 0x1.0p10 0x1.0p10]) == [0x1.0p10 0x1.0p10 0x1.0p10 0x1.0p10]
+; run: %fmin_f32x4([0x1.0 0x2.0 0x3.0 0x4.0], [0x0.5 0x3.0 0x2.5 0x5.0]) == [0x0.5 0x2.0 0x2.5 0x4.0]
+; run: %fmin_f32x4([-0x0.0 -0x0.0 -0x0.0 -0x0.0], [0x0.0 0x0.0 0x0.0 0x0.0]) == [-0x0.0 -0x0.0 -0x0.0 -0x0.0]
+; run: %fmin_f32x4([+Inf +Inf +Inf +Inf], [0x0.0 0x0.0 0x0.0 0x0.0]) == [0x0.0 0x0.0 0x0.0 0x0.0]
+; run: %fmin_f32x4([-Inf -Inf -Inf -Inf], [0x0.0 0x0.0 0x0.0 0x0.0]) == [-Inf -Inf -Inf -Inf]
+; run: %fmin_f32x4([+Inf -Inf +Inf -Inf], [0x0.0 0x0.0 0x0.0 0x0.0]) == [0x0.0 -Inf 0x0.0 -Inf]
+
+; run: %fmin_f32x4([0x1.000002p-23 0x1.000000p-23 0x1.000002p-23 0x1.000000p-23], [0x1.000000p-23 0x1.000002p-23 0x1.000000p-23 0x1.000002p-23]) == [0x1.000000p-23 0x1.000000p-23 0x1.000000p-23 0x1.000000p-23]
+; run: %fmin_f32x4([0x1.fffffcp127 0x1.fffffep127 0x1.fffffcp127 0x1.fffffep127], [0x1.fffffep127 0x1.fffffcp127 0x1.fffffep127 0x1.fffffcp127]) == [0x1.fffffcp127 0x1.fffffcp127 0x1.fffffcp127 0x1.fffffcp127]
+; run: %fmin_f32x4([0x1.000000p-126 0x1.000000p-126 0x1.000000p-126 0x1.000000p-126], [0x1.000000p-126 0x1.000000p-126 0x1.000000p-126 0x1.000000p-126]) == [0x1.000000p-126 0x1.000000p-126 0x1.000000p-126 0x1.000000p-126]
+
+; run: %fmin_f32x4([0x0.800002p-126 0x0.800000p-126 -0x0.800002p-126 -0x0.800000p-126], [-0x0.800002p-126 -0x0.800000p-126 0x0.800002p-126 0x0.800000p-126]) == [-0x0.800002p-126 -0x0.800000p-126 -0x0.800002p-126 -0x0.800000p-126]
+
+function %fmin_f64x2(f64x2, f64x2) -> f64x2 {
+block0(v0: f64x2, v1: f64x2):
+    v2 = fmin v0, v1
+    return v2
+}
+; run: %fmin_f64x2([0x0.0 0x1.0], [0x0.0 0x1.0]) == [0x0.0 0x1.0]
+; run: %fmin_f64x2([0x1.0p10 0x1.0p11], [0x1.0p10 0x1.0p10]) == [0x1.0p10 0x1.0p10]
+; run: %fmin_f64x2([0x1.0 0x2.0], [0x0.5 0x3.0]) == [0x0.5 0x2.0]
+; run: %fmin_f64x2([-0x0.0 -0x0.0], [0x0.0 0x0.0]) == [-0x0.0 -0x0.0]
+; run: %fmin_f64x2([+Inf +Inf], [0x0.0 0x0.0]) == [0x0.0 0x0.0]
+; run: %fmin_f64x2([-Inf -Inf], [0x0.0 0x0.0]) == [-Inf -Inf]
+; run: %fmin_f64x2([+Inf -Inf], [0x0.0 0x0.0]) == [0x0.0 -Inf]
+
+; run: %fmin_f64x2([0x1.0000000000002p-52 0x1.0000000000000p-52], [0x1.0000000000000p-52 0x1.0000000000002p-52]) == [0x1.0000000000000p-52 0x1.0000000000000p-52]
+; run: %fmin_f64x2([0x1.ffffffffffffcp1023 0x1.fffffffffffffp1023], [0x1.fffffffffffffp1023 0x1.ffffffffffffcp1023]) == [0x1.ffffffffffffcp1023 0x1.ffffffffffffcp1023]
+; run: %fmin_f64x2([0x1.0000000000000p-1022 0x1.0000000000000p-1022], [0x1.0000000000000p-1022 0x1.0000000000000p-1022]) == [0x1.0000000000000p-1022 0x1.0000000000000p-1022]
+
+; run: %fmin_f64x2([0x0.8000000000002p-1022 0x0.8000000000000p-1022], [-0x0.8000000000002p-1022 -0x0.8000000000000p-1022]) == [-0x0.8000000000002p-1022 -0x0.8000000000000p-1022]
diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs
@@ -886,20 +886,60 @@ where
         Opcode::Fneg => unary(DataValueExt::neg, arg(0))?,
         Opcode::Fabs => unary(DataValueExt::abs, arg(0))?,
         Opcode::Fcopysign => binary(DataValueExt::copysign, arg(0), arg(1))?,
-        Opcode::Fmin => assign(match (arg(0), arg(1)) {
-            (a, _) if a.is_nan()? => a,
-            (_, b) if b.is_nan()? => b,
-            (a, b) if a.is_zero()? && b.is_zero()? && a.is_negative()? => a,
-            (a, b) if a.is_zero()? && b.is_zero()? && b.is_negative()? => b,
-            (a, b) => a.smin(b)?,
-        }),
-        Opcode::Fmax => assign(match (arg(0), arg(1)) {
-            (a, _) if a.is_nan()? => a,
-            (_, b) if b.is_nan()? => b,
-            (a, b) if a.is_zero()? && b.is_zero()? && a.is_negative()? => b,
-            (a, b) if a.is_zero()? && b.is_zero()? && b.is_negative()? => a,
-            (a, b) => a.smax(b)?,
-        }),
+        Opcode::Fmin => {
+            let scalar_min = |a: DataValue, b: DataValue| -> ValueResult<DataValue> {
+                Ok(match (a, b) {
+                    (a, _) if a.is_nan()? => a,
+                    (_, b) if b.is_nan()? => b,
+                    (a, b) if a.is_zero()? && b.is_zero()? && a.is_negative()? => a,
+                    (a, b) if a.is_zero()? && b.is_zero()? && b.is_negative()? => b,
+                    (a, b) => a.smin(b)?,
+                })
+            };
+
+            if ctrl_ty.is_vector() {
+                let arg0 = extractlanes(&arg(0), ctrl_ty)?;
+                let arg1 = extractlanes(&arg(1), ctrl_ty)?;
+
+                assign(vectorizelanes(
+                    &(arg0
+                        .into_iter()
+                        .zip(arg1.into_iter())
+                        .map(|(a, b)| scalar_min(a, b))
+                        .collect::<ValueResult<SimdVec<DataValue>>>()?),
+                    ctrl_ty,
+                )?)
+            } else {
+                assign(scalar_min(arg(0), arg(1))?)
+            }
+        }
+        Opcode::Fmax => {
+            let scalar_max = |a: DataValue, b: DataValue| -> ValueResult<DataValue> {
+                Ok(match (a, b) {
+                    (a, _) if a.is_nan()? => a,
+                    (_, b) if b.is_nan()? => b,
+                    (a, b) if a.is_zero()? && b.is_zero()? && a.is_negative()? => b,
+                    (a, b) if a.is_zero()? && b.is_zero()? && b.is_negative()? => a,
+                    (a, b) => a.smax(b)?,
+                })
+            };
+
+            if ctrl_ty.is_vector() {
+                let arg0 = extractlanes(&arg(0), ctrl_ty)?;
+                let arg1 = extractlanes(&arg(1), ctrl_ty)?;
+
+                assign(vectorizelanes(
+                    &(arg0
+                        .into_iter()
+                        .zip(arg1.into_iter())
+                        .map(|(a, b)| scalar_max(a, b))
+                        .collect::<ValueResult<SimdVec<DataValue>>>()?),
+                    ctrl_ty,
+                )?)
+            } else {
+                assign(scalar_max(arg(0), arg(1))?)
+            }
+        }
         Opcode::Ceil => unary(DataValueExt::ceil, arg(0))?,
         Opcode::Floor => unary(DataValueExt::floor, arg(0))?,
         Opcode::Trunc => unary(DataValueExt::trunc, arg(0))?,