luxonis · ptoupas · Nov 11, 2025 · Nov 11, 2025 · Nov 12, 2025 · Nov 12, 2025
diff --git a/modelconverter/hub/README.md b/modelconverter/hub/README.md
@@ -133,6 +133,7 @@ Parameters specific to the `RVC4` conversion.
 | `snpe_dlc_graph_prepare_args`  | `list[str] \| None` | The arguments to pass to the `snpe-dlc-graph-prepare` tool.  |
 | `use_per_channel_quantization` | `bool`              | Whether to use per-channel quantization. Defaults to `True`. |
 | `use_per_row_quantization`     | `bool`              | Whether to use per-row quantization. Defaults to `False`.    |
+| `quantization_mode`            | `Literal["INT8_STANDARD", "INT8_ACCURACY_FOCUSED", "INT8_INT16_MIXED", "FP16_STANDARD", "CUSTOM"]`  | The pre-defined quantization mode to use. Defaults to `INT8_STANDARD`.   |
 | `htp_socs`                     | `list[str] \| None` | The list of HTP SoCs to use.                                 |
 
 **Hailo Parameters**

diff --git a/modelconverter/hub/convert.py b/modelconverter/hub/convert.py
@@ -3,7 +3,7 @@
 
 from luxonis_ml.typing import Kwargs, PathType
 
-from modelconverter.utils.types import PotDevice, Target
+from modelconverter.utils.types import PotDevice, QuantizationMode, Target
 
 from .__main__ import convert as cli_convert
 
@@ -125,6 +125,14 @@ def RVC4(
         snpe_dlc_graph_prepare_args: list[str] | None = None,
         use_per_channel_quantization: bool = True,
         use_per_row_quantization: bool = False,
+        quantization_mode: QuantizationMode
+        | Literal[
+            "INT8_STANDARD",
+            "INT8_ACCURACY_FOCUSED",
+            "INT8_INT16_MIXED",
+            "FP16_STANDARD",
+            "CUSTOM",
+        ] = QuantizationMode.INT8_STD,
         htp_socs: list[
             Literal[
                 "sm8350", "sm8450", "sm8550", "sm8650", "qcs6490", "qcs8550"
@@ -150,6 +158,8 @@ def RVC4(
             Whether to use per-channel quantization.
         use_per_row_quantization : bool, default False
             Whether to use per-row quantization.
+        quantization_mode : QuantizationMode | Literal["INT8_STANDARD", "INT8_ACCURACY_FOCUSED", "INT8_INT16_MIXED", "FP16_STANDARD", "CUSTOM"], default QuantizationMode.INT8_STD
+            Pre-defined quantization modes for the model conversion.
         htp_socs : list[str] | None, optional
             List of HTP SoCs for the final DLC graph.
         opts : dict[str, Any] | list[str] | None, optional
@@ -160,6 +170,8 @@ def RVC4(
             online conversion.
         """
         htp_socs = htp_socs or ["sm8550"]
+        if not isinstance(quantization_mode, QuantizationMode):
+            quantization_mode = QuantizationMode(quantization_mode)
         return cli_convert(
             Target.RVC4,
             _combine_opts(
@@ -172,6 +184,7 @@ def RVC4(
                     "use_per_channel_quantization": use_per_channel_quantization
                     or [],
                     "use_per_row_quantization": use_per_row_quantization,
+                    "quantization_mode": quantization_mode.value,
                     "htp_socs": htp_socs,
                 },
                 opts,

@@ -1,3 +1,4 @@
+import json
 import shutil
 import subprocess
 import time
@@ -22,6 +23,7 @@
     DataType,
     Encoding,
     InputFileType,
+    QuantizationMode,
     ResizeMethod,
     Target,
 )
@@ -37,7 +39,6 @@ def __init__(self, config: SingleStageConfig, output_dir: Path):
         super().__init__(config=config, output_dir=output_dir)
 
         rvc4_cfg = config.rvc4
-        self.compress_to_fp16 = rvc4_cfg.compress_to_fp16
         self.snpe_onnx_to_dlc = rvc4_cfg.snpe_onnx_to_dlc_args
         self.snpe_dlc_quant = rvc4_cfg.snpe_dlc_quant_args
         self.snpe_dlc_graph_prepare = rvc4_cfg.snpe_dlc_graph_prepare_args
@@ -46,6 +47,14 @@ def __init__(self, config: SingleStageConfig, output_dir: Path):
         )
         self.use_per_row_quantization = rvc4_cfg.use_per_row_quantization
         self.optimization_level = rvc4_cfg.optimization_level
+        self.quantization_mode = rvc4_cfg.quantization_mode
+        if self.quantization_mode != QuantizationMode.CUSTOM:
+            self.snpe_onnx_to_dlc = []
+            self.snpe_dlc_quant = []
+            self.snpe_dlc_graph_prepare = []
+            logger.warning(
+                f"Overriding user-provided SNPE arguments. Using pre-defined arguments for quantization mode {self.quantization_mode.value}."
+            )
         self.keep_raw_images = rvc4_cfg.keep_raw_images
         if "--htp_socs" in self.snpe_dlc_graph_prepare:
             i = self.snpe_dlc_graph_prepare.index("--htp_socs")
@@ -114,7 +123,7 @@ def export(self) -> Path:
             args, ["--optimization_level", str(self.optimization_level)]
         )
         self._add_args(args, ["--htp_socs", ",".join(self.htp_socs)])
-        if self.compress_to_fp16:
+        if self.quantization_mode == QuantizationMode.FP16_STD:
             self._add_args(args, ["--use_float_io"])
         self._subprocess_run(
             ["snpe-dlc-graph-prepare", *args], meta_name="graph_prepare"
@@ -157,6 +166,15 @@ def calibrate(self, dlc_path: Path) -> Path:
         if self.use_per_row_quantization:
             args.append("--use_per_row_quantization")
 
+        if self.quantization_mode == QuantizationMode.INT8_ACC:
+            self._add_args(args, ["--param_quantizer", "enhanced"])
+            self._add_args(args, ["--act_quantizer", "enhanced"])
+        elif self.quantization_mode == QuantizationMode.INT8_16_MIX:
+            self._add_args(args, ["--param_quantizer", "enhanced"])
+            self._add_args(args, ["--act_quantizer", "enhanced"])
+            self._add_args(args, ["--act_bitwidth", "16"])
+            args.append("--override_params")
+
         start_time = time.time()
         self._subprocess_run(
             ["snpe-dlc-quant", *args], meta_name="quantization_cmd"
@@ -241,6 +259,21 @@ class Entry(NamedTuple):
                 f.write(entry_str + "\n")
         return self.input_list_path
 
+    def generate_io_encodings(self) -> Path:
+        encodings_dict = {"activation_encodings": {}, "param_encodings": {}}
+        if not (list(self.inputs.keys()) and list(self.outputs.keys())):
+            logger.warning(
+                "Cannot generate I/O encodings as inputs or outputs are not defined. The resulting DLC may not be compatible with DAI."
+            )
+        for name in list(self.inputs.keys()) + list(self.outputs.keys()):
+            encodings_dict["activation_encodings"][name] = [
+                {"bitwidth": 8, "dtype": "int"}
+            ]
+        encodings_path = self.intermediate_outputs_dir / "io_encodings.json"
+        with open(encodings_path, "w") as encodings_file:
+            json.dump(encodings_dict, encodings_file, indent=4)
+        return encodings_path
+
     def onnx_to_dlc(self) -> Path:
         logger.info("Exporting for RVC4")
         args = self.snpe_onnx_to_dlc
@@ -293,8 +326,17 @@ def onnx_to_dlc(self) -> Path:
                         "Proceeding wihtout specifying layout."
                     )
 
-        if self.compress_to_fp16:
+        if self.quantization_mode == QuantizationMode.FP16_STD:
             self._add_args(args, ["--float_bitwidth", "16"])
+        elif self.quantization_mode == QuantizationMode.INT8_16_MIX:
+            io_encodings_file = self.generate_io_encodings()
+            self._add_args(
+                args,
+                [
+                    "--quantization_overrides",
+                    f"{io_encodings_file}",
+                ],
+            )
 
         if self.is_tflite:
             command = "snpe-tflite-to-dlc"

@@ -27,6 +27,7 @@
     Encoding,
     InputFileType,
     PotDevice,
+    QuantizationMode,
     ResizeMethod,
     Target,
 )
@@ -264,21 +265,21 @@ class RVC3Config(BlobBaseConfig):
 
 
 class RVC4Config(TargetConfig):
-    compress_to_fp16: bool = False
     snpe_onnx_to_dlc_args: list[str] = []
     snpe_dlc_quant_args: list[str] = []
     snpe_dlc_graph_prepare_args: list[str] = []
     keep_raw_images: bool = False
     use_per_channel_quantization: bool = True
     use_per_row_quantization: bool = False
     optimization_level: Literal[1, 2, 3] = 2
+    quantization_mode: QuantizationMode = QuantizationMode.INT8_STD
     htp_socs: list[
         Literal["sm8350", "sm8450", "sm8550", "sm8650", "qcs6490", "qcs8550"]
     ] = ["sm8550"]
 
     @model_validator(mode="after")
     def _validate_fp16(self) -> Self:
-        if not self.compress_to_fp16:
+        if self.quantization_mode != QuantizationMode.FP16_STD:
             return self
         self.disable_calibration = True
         if "qcs8550" not in self.htp_socs:

@@ -20,7 +20,12 @@
 from modelconverter.utils.constants import MISC_DIR
 from modelconverter.utils.layout import guess_new_layout, make_default_layout
 from modelconverter.utils.metadata import Metadata, get_metadata
-from modelconverter.utils.types import DataType, Encoding, Target
+from modelconverter.utils.types import (
+    DataType,
+    Encoding,
+    QuantizationMode,
+    Target,
+)
 
 
 def get_archive_input(cfg: NNArchiveConfig, name: str) -> NNArchiveInput:
@@ -231,21 +236,27 @@ def modelconverter_config_to_nn(
     target_cfg = cfg.get_target_config(target)
 
     # TODO: This might be more complicated for Hailo
-
-    onnx_args = getattr(target_cfg, "snpe_onnx_to_dlc_args", [])
-    prep_args = getattr(target_cfg, "snpe_dlc_graph_prepare_args", [])
-    fb16 = any(
-        a == "--float_bitwidth" and str(b) == "16"
-        for a, b in pairwise(onnx_args)
-    ) or any(
-        isinstance(x, str)
-        and x.startswith("--float_bitwidth=")
-        and x.split("=", 1)[1] == "16"
-        for x in onnx_args
-    )
-    compress_to_fp16 = getattr(target_cfg, "compress_to_fp16", False) or (
-        fb16 and "--use_float_io" in prep_args
-    )
+    quantization_mode = getattr(target_cfg, "quantization_mode", None)
+    if (
+        quantization_mode is None
+        or quantization_mode == QuantizationMode.CUSTOM
+    ):
+        onnx_args = getattr(target_cfg, "snpe_onnx_to_dlc_args", [])
+        prep_args = getattr(target_cfg, "snpe_dlc_graph_prepare_args", [])
+        fb16 = any(
+            a == "--float_bitwidth" and str(b) == "16"
+            for a, b in pairwise(onnx_args)
+        ) or any(
+            isinstance(x, str)
+            and x.startswith("--float_bitwidth=")
+            and x.split("=", 1)[1] == "16"
+            for x in onnx_args
+        )
+        compress_to_fp16 = getattr(target_cfg, "compress_to_fp16", False) or (
+            fb16 and "--use_float_io" in prep_args
+        )
+    else:
+        compress_to_fp16 = quantization_mode == QuantizationMode.FP16_STD
     disable_calibration = target_cfg.disable_calibration
 
     match target, compress_to_fp16, disable_calibration:
@@ -290,10 +301,10 @@ def modelconverter_config_to_nn(
             layout = make_default_layout(new_shape)
         dai_type = inp.encoding.to.value
         if inp.data_type == DataType.FLOAT16:
-            type = "F16F16F16"
+            channel_format = "F16F16F16"
         else:
-            type = "888"
-        dai_type += type
+            channel_format = "888"
+        dai_type += channel_format
         dai_type += "i" if layout == "NHWC" else "p"
 
         dtype = _get_io_dtype(

@@ -229,6 +229,15 @@ class Target(Enum):
     RVC4 = "rvc4"
 
 
+class QuantizationMode(Enum):
+    INT8_STD = "INT8_STANDARD"
+    INT8_ACC = "INT8_ACCURACY_FOCUSED"
+    INT8_16_MIX = "INT8_INT16_MIXED"
+    INT8_FP16_MIX = "INT8_FP16_MIXED"  # Added for future use
+    FP16_STD = "FP16_STANDARD"
+    CUSTOM = "CUSTOM"
+
+
 class InputFileType(Enum):
     ONNX = "ONNX"
     IR = "IR"

@@ -213,12 +213,8 @@ stages:
       # List of platforms to pre-compute the DLC graph for.
       htp_socs: ["sm8550"]
 
-      # Configures conversion to float16 precision. This will
-      # disable the calibration, add the `--float_bitwidth 16`
-      # flag to `snpe-onnx-to-dlc`,  the `--use_float_io` flag
-      # to `snpe-dlc-graph-prepare`, and the `qcs8550` platform
-      # to the `htp_socs` list.
-      compress_to_fp16: False
-
       # Optimization level for the DLC graph preparation. The available levels are: 1, 2, and 3. Higher optimization levels incur longer offline prepare time but yields more optimal graph and hence faster execution time for most graphs.
       optimization_level: 2
+
+      # Pre-defined quantization modes for the RVC4 exporter. Pre-defined modes (except CUSTOM) will override any user-provided SNPE arguments via `snpe_onnx_to_dlc_args`, `snpe_dlc_quant_args`, and `snpe_dlc_graph_prepare_args`. The available quantization modes are: INT8_STANDARD, INT8_ACCURACY_FOCUSED, INT8_INT16_MIXED, FP16_STANDARD, and CUSTOM.
+      quantization_mode: INT8_STANDARD
@@ -25,6 +25,7 @@
     Encoding,
     InputFileType,
     PotDevice,
+    QuantizationMode,
     ResizeMethod,
     Target,
 )
@@ -65,7 +66,7 @@
         "disable_calibration": False,
         "use_per_channel_quantization": True,
         "use_per_row_quantization": False,
-        "compress_to_fp16": False,
+        "quantization_mode": QuantizationMode.INT8_STD,
         "optimization_level": 2,
     },
     "hailo": {