[5763448][ONNX][Autocast] Fix Resize input type mismatch error (#757)

gcunhase · kevalmorabia97 · commit 41aaec5368aa · 2026-01-14T09:27:41.000+05:30
## What does this PR do? **Type of change:** Bug fix **Overview:** This PR fixes an input type mismatch in Resize layers when being converted to FP16. ## Usage ```python $ python -m modelopt.onnx.autocast --onnx_path=$MODEL_NAME.onnx ``` ## Testing Added unittest. ## Before your PR is "*Ready for review*"  - **Make sure you read and follow [Contributor guidelines](https://github.com/NVIDIA/Model-Optimizer/blob/main/CONTRIBUTING.md)** and your commits are signed. - **Is this change backward compatible?**: Yes - **Did you write any new necessary tests?**: Yes - **Did you add or update any necessary documentation?**: No - **Did you update [Changelog](https://github.com/NVIDIA/Model-Optimizer/blob/main/CHANGELOG.rst)?**: No ## Additional Information This issue is also fixed by using the standalone type inference logic from #719.  ## Summary by CodeRabbit ## Release Notes * **Improvements** * Enhanced the graph sanitization process to automatically duplicate shared constants during optimization, ensuring improved model handling and consistency. * **Tests** * Added test coverage for mixed precision conversion of Conv-Resize model architectures. <sub>✏️ Tip: You can customize this high-level summary in your review settings.</sub>  --------- Signed-off-by: gcunhase <4861122+gcunhase@users.noreply.github.com>
diff --git a/modelopt/onnx/autocast/graphsanitizer.py b/modelopt/onnx/autocast/graphsanitizer.py
@@ -67,6 +67,7 @@ def sanitize(self) -> None:
         self.convert_opset()
         self.replace_layernorm_pattern()
         self.ensure_graph_name_exists()
+        self.duplicate_shared_constants()
         onnx_utils.name_onnx_nodes(self.model.graph)
         self.replace_custom_domain_nodes()
         self.sanitize_io_casts()
@@ -254,6 +255,12 @@ def ensure_graph_name_exists(self) -> None:
         if not self.model.graph.name:
             self.model.graph.name = "model"
 
+    def duplicate_shared_constants(self) -> None:
+        """Duplicate constant tensors if they are shared."""
+        self.model, is_duplicated_constant = onnx_utils.duplicate_shared_constants(self.model)
+        if is_duplicated_constant:
+            logger.warning("Shared constants were detected and duplicated accordingly.")
+
     def _match_layernorm_pattern(self, mean_node: onnx.NodeProto) -> dict | None:
         """Match the sequence of operations that constitute a LayerNorm.
 
diff --git a/modelopt/onnx/autocast/precisionconverter.py b/modelopt/onnx/autocast/precisionconverter.py
@@ -1419,6 +1419,11 @@ def _sanitize_model(self):
         graph_sanitizer.sanitize()
         self.model = graph_sanitizer.model
 
+        # Update value_info_map and initializer_map after sanitizing model
+        self.value_info_map, self.initializer_map, self.node_to_init_map = utils.setup_mappings(
+            self.model
+        )
+
     def _create_skip_inputs_mapping(self, tensor_block_dict: dict[str, dict[str, list[int]]] = {}):
         """Create mapping of op types to indices of inputs that should not be converted to low precision."""
         skip_inputs_map = {}
diff --git a/modelopt/onnx/quantization/fp8.py b/modelopt/onnx/quantization/fp8.py
@@ -102,7 +102,7 @@ def _convert(node: onnx.NodeProto):
             )
             zero_point = initializers[zero_point_idx]
             dtype = onnx.helper.tensor_dtype_to_np_dtype(zero_point.data_type)
-            vals = np.array(zero_point.int32_data, dtype=dtype).tobytes()
+            vals = np.array(zero_point.int32_data, dtype=dtype).tobytes() or zero_point.raw_data
 
             np_zero_point = onnx.helper.make_tensor(
                 zero_point_name, onnx.TensorProto.FLOAT8E4M3FN, zero_point.dims, vals, raw=True
diff --git a/tests/_test_utils/onnx/lib_test_models.py b/tests/_test_utils/onnx/lib_test_models.py
@@ -924,3 +924,88 @@ def build_conv_isinf_model(opset_version=13):
     onnx.checker.check_model(model_inferred)
 
     return model_inferred
+
+
+def build_conv_resize_model():
+    # Define your model inputs and outputs
+    input_names = ["input_0"]
+    output_names = ["output_0"]
+    input_shapes = [(1, 288, 32, 32)]
+    output_shapes = [(1, 16, 64, 64)]
+
+    inputs = [
+        helper.make_tensor_value_info(input_name, onnx.TensorProto.FLOAT, input_shape)
+        for input_name, input_shape in zip(input_names, input_shapes)
+    ]
+    outputs = [
+        helper.make_tensor_value_info(output_name, onnx.TensorProto.FLOAT, output_shape)
+        for output_name, output_shape in zip(output_names, output_shapes)
+    ]
+
+    # Create the ONNX graph with the nodes
+    nodes = [
+        helper.make_node(
+            op_type="Conv",
+            inputs=["input_0", "weights_1"],
+            outputs=["conv1_conv/Conv2D:0"],
+            name="conv1_conv/Conv2D",
+            dilations=[1, 1],
+            group=1,
+            kernel_shape=[1, 1],
+            pads=[0, 0, 0, 0],
+            strides=[1, 1],
+        ),
+        # Note: resize_roi_scales is intentionally used for both roi and scales inputs
+        # to test the shared constant duplication fix (PR #757)
+        helper.make_node(
+            op_type="Resize",
+            inputs=[
+                "conv1_conv/Conv2D:0",
+                "resize_roi_scales",
+                "resize_roi_scales",
+                "resize_sizes",
+            ],
+            outputs=["output_0"],
+            name="resize1_resize/Resize",
+            coordinate_transformation_mode="asymmetric",
+            cubic_coeff_a=-0.75,
+            mode="nearest",
+            nearest_mode="floor",
+        ),
+    ]
+
+    # Create the ONNX initializers
+    initializers = [
+        helper.make_tensor(
+            name="weights_1",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(16, 288, 1, 1),
+            vals=np.random.uniform(low=0.5, high=1.0, size=16 * 288 * 1 * 1),
+        ),
+        helper.make_tensor(
+            name="resize_roi_scales",
+            data_type=onnx.TensorProto.FLOAT,
+            dims=(0,),
+            vals=[],
+        ),
+        helper.make_tensor(
+            name="resize_sizes",
+            data_type=onnx.TensorProto.INT64,
+            dims=(4,),
+            vals=[1, 16, 64, 64],
+        ),
+    ]
+
+    # Create the ONNX graph with the nodes and initializers
+    graph = helper.make_graph(nodes, "conv_resize", inputs, outputs, initializer=initializers)
+
+    # Create the ONNX model
+    model = helper.make_model(graph)
+    model.opset_import[0].version = 13
+    model.ir_version = 10
+
+    # Check the ONNX model
+    model_inferred = onnx.shape_inference.infer_shapes(model)
+    onnx.checker.check_model(model_inferred)
+
+    return model_inferred
diff --git a/tests/unit/onnx/autocast/test_autocast.py b/tests/unit/onnx/autocast/test_autocast.py
@@ -20,7 +20,7 @@
 import onnx
 import onnx_graphsurgeon as gs
 import pytest
-from _test_utils.onnx.lib_test_models import build_conv_isinf_model
+from _test_utils.onnx.lib_test_models import build_conv_isinf_model, build_conv_resize_model
 
 import modelopt.onnx.autocast.utils as utils
 import modelopt.onnx.utils as onnx_utils
@@ -174,7 +174,7 @@ def test_conv_isinf_conversion(tmp_path, opset_version):
     output_onnx_path = onnx_path.replace(".onnx", ".fp16.onnx")
     onnx.save(converted_model, output_onnx_path)
 
-    # Load the output model and check QDQ node placements
+    # Load the output model
     graph = gs.import_onnx(converted_model)
 
     # Check that Conv is converted
@@ -190,6 +190,30 @@ def test_conv_isinf_conversion(tmp_path, opset_version):
     assert assert_input_precision(isinf_nodes, dtype=supported_dtype)
 
 
+def test_conv_resize_conversion(tmp_path):
+    onnx_model = build_conv_resize_model()
+    onnx_path = os.path.join(tmp_path, "conv_resize_model.onnx")
+    onnx.save(onnx_model, onnx_path)
+
+    # Convert the model
+    converted_model = convert_to_mixed_precision(onnx_path=onnx_path)
+
+    # Output model should be produced in the same tmp_path
+    output_onnx_path = onnx_path.replace(".onnx", ".fp16.onnx")
+    onnx.save(converted_model, output_onnx_path)
+
+    # Load the output model
+    graph = gs.import_onnx(converted_model)
+
+    # Check that Resize is correctly converted:
+    # - Data and ROI inputs (indices 0 and 1) should be FP16
+    # - The remaining inputs (scales/sizes) should be kept in their original precisions
+    resize_node = next(n for n in graph.nodes if n.op == "Resize")
+    assert all(inp.dtype == np.float16 for inp in resize_node.inputs[0:2]), (
+        "Resize data and ROI inputs should be FP16"
+    )
+
+
 @pytest.mark.parametrize("target_opset", [13, 17, 19, 21])
 def test_opset_parameter(temp_model_path, target_opset):
     """Test that the opset parameter correctly sets the output model's opset version."""

Original file line number	Diff line number	Diff line change
`@@ -102,7 +102,7 @@ def _convert(node: onnx.NodeProto):`
`102`	`102`	`)`
`103`	`103`	`zero_point = initializers[zero_point_idx]`
`104`	`104`	`dtype = onnx.helper.tensor_dtype_to_np_dtype(zero_point.data_type)`
`105`		`- vals = np.array(zero_point.int32_data, dtype=dtype).tobytes()`
	`105`	`+ vals = np.array(zero_point.int32_data, dtype=dtype).tobytes() or zero_point.raw_data`
`106`	`106`
`107`	`107`	`np_zero_point = onnx.helper.make_tensor(`
`108`	`108`	`zero_point_name, onnx.TensorProto.FLOAT8E4M3FN, zero_point.dims, vals, raw=True`