Add INT32 support to SUB (#3037)

HemanthSai7 · web-flow · commit 686df2db9f9c · 2025-06-04T16:34:13.000Z
- Add INT32 support in sub - Add Tflite tests in sub_test.cc bug=fixes #2720
diff --git a/tensorflow/lite/micro/kernels/sub.cc b/tensorflow/lite/micro/kernels/sub.cc
@@ -1,4 +1,4 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2025 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -36,39 +36,76 @@ void* SubInit(TfLiteContext* context, const char* buffer, size_t length) {
   return context->AllocatePersistentBuffer(context, sizeof(OpDataSub));
 }
 
-void EvalSub(TfLiteContext* context, TfLiteNode* node, TfLiteSubParams* params,
-             const OpDataSub* data, const TfLiteEvalTensor* input1,
-             const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
-  float output_activation_min, output_activation_max;
-  CalculateActivationRange(params->activation, &output_activation_min,
-                           &output_activation_max);
-  tflite::ArithmeticParams op_params;
-  SetActivationParams(output_activation_min, output_activation_max, &op_params);
-  if (data->requires_broadcast) {
-    tflite::reference_ops::BroadcastSubSlow(
-        op_params, tflite::micro::GetTensorShape(input1),
-        tflite::micro::GetTensorData<float>(input1),
-        tflite::micro::GetTensorShape(input2),
-        tflite::micro::GetTensorData<float>(input2),
-        tflite::micro::GetTensorShape(output),
-        tflite::micro::GetTensorData<float>(output));
-  } else {
-    tflite::reference_ops::SubWithActivation(
-        op_params, tflite::micro::GetTensorShape(input1),
-        tflite::micro::GetTensorData<float>(input1),
-        tflite::micro::GetTensorShape(input2),
-        tflite::micro::GetTensorData<float>(input2),
-        tflite::micro::GetTensorShape(output),
-        tflite::micro::GetTensorData<float>(output));
+TfLiteStatus EvalSub(TfLiteContext* context, TfLiteNode* node,
+                     TfLiteSubParams* params, const OpDataSub* data,
+                     const TfLiteEvalTensor* input1,
+                     const TfLiteEvalTensor* input2, TfLiteEvalTensor* output) {
+  switch (output->type) {
+    case kTfLiteFloat32: {
+      float output_activation_min, output_activation_max;
+      CalculateActivationRange(params->activation, &output_activation_min,
+                               &output_activation_max);
+      tflite::ArithmeticParams op_params;
+      SetActivationParams(output_activation_min, output_activation_max,
+                          &op_params);
+      if (data->requires_broadcast) {
+        tflite::reference_ops::BroadcastSubSlow(
+            op_params, tflite::micro::GetTensorShape(input1),
+            tflite::micro::GetTensorData<float>(input1),
+            tflite::micro::GetTensorShape(input2),
+            tflite::micro::GetTensorData<float>(input2),
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<float>(output));
+      } else {
+        tflite::reference_ops::SubWithActivation(
+            op_params, tflite::micro::GetTensorShape(input1),
+            tflite::micro::GetTensorData<float>(input1),
+            tflite::micro::GetTensorShape(input2),
+            tflite::micro::GetTensorData<float>(input2),
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<float>(output));
+      }
+    } break;
+    case kTfLiteInt32: {
+      int32_t output_activation_min, output_activation_max;
+      CalculateActivationRange(params->activation, &output_activation_min,
+                               &output_activation_max);
+      tflite::ArithmeticParams op_params;
+      SetActivationParams(output_activation_min, output_activation_max,
+                          &op_params);
+      if (data->requires_broadcast) {
+        tflite::reference_ops::BroadcastSubSlow(
+            op_params, tflite::micro::GetTensorShape(input1),
+            tflite::micro::GetTensorData<int32_t>(input1),
+            tflite::micro::GetTensorShape(input2),
+            tflite::micro::GetTensorData<int32_t>(input2),
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<int32_t>(output));
+      } else {
+        tflite::reference_ops::SubWithActivation(
+            op_params, tflite::micro::GetTensorShape(input1),
+            tflite::micro::GetTensorData<int32_t>(input1),
+            tflite::micro::GetTensorShape(input2),
+            tflite::micro::GetTensorData<int32_t>(input2),
+            tflite::micro::GetTensorShape(output),
+            tflite::micro::GetTensorData<int32_t>(output));
+      }
+    } break;
+    default:
+      MicroPrintf("Type %s (%d) not supported.",
+                  TfLiteTypeGetName(output->type), output->type);
+      return kTfLiteError;
   }
+
+  return kTfLiteOk;
 }
 
 TfLiteStatus EvalSubQuantized(TfLiteContext* context, TfLiteNode* node,
                               TfLiteSubParams* params, const OpDataSub* data,
                               const TfLiteEvalTensor* input1,
                               const TfLiteEvalTensor* input2,
                               TfLiteEvalTensor* output) {
-  tflite::ArithmeticParams op_params;
+  tflite::ArithmeticParams op_params = {};
   op_params.left_shift = data->left_shift;
   op_params.input1_offset = data->input1_offset;
   op_params.input1_multiplier = data->input1_multiplier;
@@ -147,8 +184,9 @@ TfLiteStatus SubEval(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->user_data != nullptr);
   const OpDataSub& data = *(static_cast<const OpDataSub*>(node->user_data));
 
-  if (output->type == kTfLiteFloat32) {
-    EvalSub(context, node, params, &data, input1, input2, output);
+  if (output->type == kTfLiteFloat32 || output->type == kTfLiteInt32) {
+    TF_LITE_ENSURE_OK(
+        context, EvalSub(context, node, params, &data, input1, input2, output));
   } else if (output->type == kTfLiteInt8 || output->type == kTfLiteInt16) {
     TF_LITE_ENSURE_OK(context, EvalSubQuantized(context, node, params, &data,
                                                 input1, input2, output));
diff --git a/tensorflow/lite/micro/kernels/sub_common.cc b/tensorflow/lite/micro/kernels/sub_common.cc
@@ -1,4 +1,4 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2025 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -98,6 +98,16 @@ TfLiteStatus SubPrepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_STATUS(
       CalculateOpDataSub(context, params, input1, input2, output, data));
 
+  if (output->type == kTfLiteInt32) {
+    // Only support INT32 unquantized SUB for now.
+    TF_LITE_ENSURE_EQ(context, input1->quantization.type,
+                      kTfLiteNoQuantization);
+    TF_LITE_ENSURE_EQ(context, input2->quantization.type,
+                      kTfLiteNoQuantization);
+    TF_LITE_ENSURE_EQ(context, output->quantization.type,
+                      kTfLiteNoQuantization);
+  }
+
   micro_context->DeallocateTempTfLiteTensor(input1);
   micro_context->DeallocateTempTfLiteTensor(input2);
   micro_context->DeallocateTempTfLiteTensor(output);
diff --git a/tensorflow/lite/micro/kernels/sub_test.cc b/tensorflow/lite/micro/kernels/sub_test.cc
@@ -1,4 +1,4 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2025 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -105,6 +105,29 @@ void TestSubFloat(int* input1_dims_data, const float* input1_data,
                      ElementCount(*output_dims), activation);
 }
 
+#if !defined(XTENSA)
+void TestSubInt32(int* input1_dims_data, const int32_t* input1_data,
+                  int* input2_dims_data, const int32_t* input2_data,
+                  int* output_dims_data, const int32_t* expected_output,
+                  TfLiteFusedActivation activation, int32_t* output_data) {
+  TfLiteIntArray* input1_dims = IntArrayFromInts(input1_dims_data);
+  TfLiteIntArray* input2_dims = IntArrayFromInts(input2_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInts(output_dims_data);
+
+  constexpr int inputs_size = 2;
+  constexpr int outputs_size = 1;
+  constexpr int tensors_size = inputs_size + outputs_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateTensor(input1_data, input1_dims),
+      CreateTensor(input2_data, input2_dims),
+      CreateTensor(output_data, output_dims),
+  };
+
+  ValidateSubGoldens(tensors, tensors_size, expected_output, output_data,
+                     ElementCount(*output_dims), activation);
+}
+#endif
+
 template <typename T>
 void TestSubQuantized(int* input1_dims_data, const float* input1_data,
                       T* input1_quantized, float input1_scale,
@@ -219,6 +242,20 @@ TF_LITE_MICRO_TEST(FloatSubWithScalarBroadcast) {
   }
 }
 
+#if !defined(XTENSA)
+TF_LITE_MICRO_TEST(Int32SubNoActivation) {
+  int inout_shape[] = {4, 1, 2, 2, 1};
+  const int32_t input1_values[] = {-2, 2147483646, -1, 1146622854};
+  const int32_t input2_values[] = {3, 1, -2147483647, -726978367};
+  const int32_t golden_values[] = {-5, 2147483645, 2147483646, 1873601221};
+  const int kOutputDimsCount = 4;
+  int32_t output_data[kOutputDimsCount];
+  tflite::testing::TestSubInt32(inout_shape, input1_values, inout_shape,
+                                input2_values, inout_shape, golden_values,
+                                kTfLiteActNone, output_data);
+}
+#endif
+
 TF_LITE_MICRO_TEST(QuantizedSubNoActivationInt8) {
   const float scales[] = {0.25, 0.5, 1.0};
   const int zero_points[] = {-10, 4, 13};