Merge pull request #368 from Xilinx/jrickert.allow_lowering_of_int4_to_tosa

jorickert · web-flow · commit d40ef125a5c0 · 2025-06-23T08:25:16.000+02:00
Allow lowering of (u)int4 to tosa.
diff --git a/src/Conversion/ONNXToTOSA/Math/Elementwise.cpp b/src/Conversion/ONNXToTOSA/Math/Elementwise.cpp
@@ -378,6 +378,11 @@ class ONNXCastOpLoweringToTOSA : public OpConversionPattern<ONNXCastOp> {
     if (!inputTy) {
       return rewriter.notifyMatchFailure(op, "expected valid input type");
     }
+    if (isa<FloatType>(inputTy.getElementType()) &&
+        resultTy.getElementType().isUnsignedInteger()) {
+      return rewriter.notifyMatchFailure(
+          op, "TOSA does not support cast from float to unsigned integer");
+    }
     if (isa<FloatType>(inputTy.getElementType()) &&
         isa<IntegerType>(resultTy.getElementType())) {
       // ONNX.Cast has truncating behavior, and tosa.cast has rounds
diff --git a/src/Conversion/ONNXToTOSA/ONNXToTOSACommon.hpp b/src/Conversion/ONNXToTOSA/ONNXToTOSACommon.hpp
@@ -93,7 +93,8 @@ inline bool isTOSABool(mlir::Type type) {
 
 inline bool isTOSAInt(mlir::Type type) {
   mlir::IntegerType intType = mlir::dyn_cast<mlir::IntegerType>(type);
-  std::set<unsigned> intWidth{1, 8, 16, 32, 48, 64};
+  // Int 4 is not a tosa int, but supported by tosa.mlir
+  std::set<unsigned> intWidth{1, 4, 8, 16, 32, 48, 64};
   return intType && (intType.isSignless() || intType.isUnsignedInteger()) &&
          (intWidth.find(intType.getWidth()) != intWidth.end());
 }
diff --git a/test/mlir/conversion/onnx_to_tosa/Math/Elementwise.mlir b/test/mlir/conversion/onnx_to_tosa/Math/Elementwise.mlir
@@ -25,9 +25,37 @@ func.func @test_cast_int4_and_uint4_to_from_int8_uint8(%arg0: tensor<1xi4>, %arg
     %2 = "onnx.Cast"(%arg1) {saturate = 1 : si64, to = ui8} : (tensor<1xui4>) -> tensor<1xui8>
     %3 = "onnx.Cast"(%2) {saturate = 1 : si64, to = ui4} : (tensor<1xui8>) -> tensor<1xui4>
     onnx.Return %1, %3 : tensor<1xi4>, tensor<1xui4>
-    // CHECK-LABEL:   func.func @test_cast_int4_and_uint4_to_from_int8_uint8(
-    // TOSA does not support int4 casting
-    // CHECK-NOT: tosa.cast 
+// CHECK-LABEL:  func.func @test_cast_int4_and_uint4_to_from_int8_uint8
+// CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<1xi4>, [[PARAM_1_:%.+]]: tensor<1xui4>) -> (tensor<1xi4>, tensor<1xui4>) {
+// CHECK-DAG:       [[VAR_0_:%.+]] = tosa.cast [[PARAM_0_]] : (tensor<1xi4>) -> tensor<1xi8>
+// CHECK-DAG:       [[VAR_1_:%.+]] = tosa.cast [[VAR_0_]] : (tensor<1xi8>) -> tensor<1xi4>
+// CHECK-DAG:       [[VAR_2_:%.+]] = tosa.cast [[PARAM_1_]] : (tensor<1xui4>) -> tensor<1xui8>
+// CHECK-DAG:       [[VAR_3_:%.+]] = tosa.cast [[VAR_2_]] : (tensor<1xui8>) -> tensor<1xui4>
+// CHECK:           onnx.Return [[VAR_1_]], [[VAR_3_]] : tensor<1xi4>, tensor<1xui4>
+// CHECK:         }
+}
+
+// -----
+
+func.func @test_cast_int4_and_uint4_to_float_and_back(%arg0: tensor<1xi4>, %arg1: tensor<1xui4>) -> (tensor<1xi4>, tensor<1xui4>) {
+    %0 = "onnx.Cast"(%arg0) {saturate = 1 : si64, to = f32} : (tensor<1xi4>) -> tensor<1xf32>
+    %1 = "onnx.Cast"(%0) {saturate = 1 : si64, to = i4} : (tensor<1xf32>) -> tensor<1xi4>
+    %2 = "onnx.Cast"(%arg1) {saturate = 1 : si64, to = f32} : (tensor<1xui4>) -> tensor<1xf32>
+    %3 = "onnx.Cast"(%2) {saturate = 1 : si64, to = ui4} : (tensor<1xf32>) -> tensor<1xui4>
+    onnx.Return %1, %3 : tensor<1xi4>, tensor<1xui4>
+// CHECK-LABEL:  func.func @test_cast_int4_and_uint4_to_float_and_back
+// CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<1xi4>, [[PARAM_1_:%.+]]: tensor<1xui4>) -> (tensor<1xi4>, tensor<1xui4>) {
+// CHECK-DAG:       [[VAR_0_:%.+]] = tosa.cast [[PARAM_0_]] : (tensor<1xi4>) -> tensor<1xf32>
+// CHECK-DAG:       [[VAR_1_:%.+]] = "tosa.const"() <{value = dense<0.000000e+00> : tensor<1xf32>}> : () -> tensor<1xf32>
+// CHECK-DAG:       [[VAR_2_:%.+]] = tosa.greater_equal [[VAR_0_]], [[VAR_1_]] : (tensor<1xf32>, tensor<1xf32>) -> tensor<1xi1>
+// CHECK-DAG:       [[VAR_3_:%.+]] = tosa.floor [[VAR_0_]] : (tensor<1xf32>) -> tensor<1xf32>
+// CHECK-DAG:       [[VAR_4_:%.+]] = tosa.ceil [[VAR_0_]] : (tensor<1xf32>) -> tensor<1xf32>
+// CHECK-DAG:       [[VAR_5_:%.+]] = tosa.select [[VAR_2_]], [[VAR_3_]], [[VAR_4_]] : (tensor<1xi1>, tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32>
+// CHECK-DAG:       [[VAR_6_:%.+]] = tosa.cast [[VAR_5_]] : (tensor<1xf32>) -> tensor<1xi4>
+// CHECK-DAG:       [[VAR_7_:%.+]] = tosa.cast [[PARAM_1_]] : (tensor<1xui4>) -> tensor<1xf32>
+// CHECK-DAG:       [[VAR_8_:%.+]] = "onnx.Cast"([[VAR_7_]]) {saturate = 1 : si64, to = ui4} : (tensor<1xf32>) -> tensor<1xui4>
+// CHECK:           onnx.Return [[VAR_6_]], [[VAR_8_]] : tensor<1xi4>, tensor<1xui4>
+// CHECK:         }
 }
 
 // -----
diff --git a/test/mlir/conversion/onnx_to_tosa/Tensor/Constant.mlir b/test/mlir/conversion/onnx_to_tosa/Tensor/Constant.mlir
@@ -45,6 +45,24 @@ func.func @test_int_dense() -> tensor<2xi8> {
 
 // -----
 
+func.func @test_int4_dense() -> tensor<2xi4> {
+  %0 = "onnx.Constant"() {value = dense<[-1, -2]> : tensor<2xi4>} : () -> tensor<2xi4>
+  return %0 : tensor<2xi4>
+// CHECK-LABEL: @test_int4_dense() -> tensor<2xi4>
+// CHECK:       "tosa.const"() <{value = dense<[-1, -2]> : tensor<2xi4>}> : () -> tensor<2xi4>
+}
+
+// -----
+
+func.func @test_uint4_dense() -> tensor<2xui4> {
+  %0 = "onnx.Constant"() {value = dense<[1, 2]> : tensor<2xui4>} : () -> tensor<2xui4>
+  return %0 : tensor<2xui4>
+// CHECK-LABEL: @test_uint4_dense() -> tensor<2xui4>
+// CHECK:       "tosa.const"() <{value = dense<[1, 2]> : tensor<2xui4>}> : () -> tensor<2xui4>
+}
+
+// -----
+
 func.func @test_bool_single() -> tensor<i1> {
   %0 = "onnx.Constant"() {value = dense<true> : tensor<i1>} : () -> tensor<i1>
   return %0 : tensor<i1>

Original file line number	Diff line number	Diff line change
`@@ -93,7 +93,8 @@ inline bool isTOSABool(mlir::Type type) {`
`93`	`93`
`94`	`94`	`inline bool isTOSAInt(mlir::Type type) {`
`95`	`95`	`mlir::IntegerType intType = mlir::dyn_cast<mlir::IntegerType>(type);`
`96`		`- std::set<unsigned> intWidth{1, 8, 16, 32, 48, 64};`
	`96`	`+ // Int 4 is not a tosa int, but supported by tosa.mlir`
	`97`	`+ std::set<unsigned> intWidth{1, 4, 8, 16, 32, 48, 64};`
`97`	`98`	`return intType && (intType.isSignless() \|\| intType.isUnsignedInteger()) &&`
`98`	`99`	`(intWidth.find(intType.getWidth()) != intWidth.end());`
`99`	`100`	`}`