luxonis · ptoupas · Nov 11, 2025 · Nov 11, 2025 · Nov 12, 2025 · Nov 12, 2025
@@ -43,7 +43,7 @@ jobs:
       uses: actions/checkout@v4
 
     - name: Set up Docker
-      uses: crazy-max/ghaction-setup-docker@v3
+      uses: crazy-max/ghaction-setup-docker@v4
 
     - name: Set up Python
       uses: actions/setup-python@v5

@@ -361,6 +361,19 @@ Below is a table of common command-line options available when using the `modelc
 > [!NOTE]
 > This table is not exhaustive. For more detailed information about available options, run `modelconverter convert --help` in your command line interface. You can also check all the `[ config overrides ]` available at [defaults.yaml](shared_with_container/configs/defaults.yaml).
 
+##### RVC4 Quantization Mode Option
+
+The `rvc4.quantization_mode` CLI option allows you to choose between different pre-defined quantization modes for RVC4 conversions. The available modes are:
+
+- `INT8_STANDARD`: Standard INT8 quantization **with calibration** (default), for optimal performance (FPS) and model size.
+- `INT8_ACCURACY_FOCUSED`: INT8 quantization **with calibration**. This mode utilizes more advanced quantization techniques that may improve accuracy without reducing performance or increasing the model size, depending on the model.
+- `INT8_INT16_MIXED`: Mixed INT8 and INT16 quantization **with calibration**. This mode uses 8-bit weights and 16-bit activations across all layers for improved numeric stability and accuracy at the cost of reduced performance (FPS) and increased model size.
+- `FP16_STANDARD`: FP16 quantization **without calibration**, for models that require higher accuracy and numeric stability, at the cost of performance (FPS) and increased model size.
+- `CUSTOM`: Custom quantization mode, where the user can specify more advanced options in the configuration file or via command-line arguments.
+
+> [!IMPORTANT]
+> When `rvc4.quantization_mode` is set to anything other than `CUSTOM`, the default settings for that mode will override any custom settings provided in the configuration file or via command-line arguments (via the `rvc4.snpe_onnx_to_dlc_args`, `rvc4.snpe_dlc_quant_args`, or `rvc4.snpe_dlc_graph_prepare_args` options).
+
 #### Handling Large ONNX Files (Exceeding 2GB)
 
 When working with ONNX models that exceed 2GB in size, the model data must be stored using ONNX's external data mechanism. This separates the model structure from the large weight data.

@@ -1,3 +1,4 @@
+import json
 import shutil
 import subprocess
 import time
@@ -22,6 +23,7 @@
     DataType,
     Encoding,
     InputFileType,
+    QuantizationMode,
     ResizeMethod,
     Target,
 )
@@ -37,7 +39,6 @@ def __init__(self, config: SingleStageConfig, output_dir: Path):
         super().__init__(config=config, output_dir=output_dir)
 
         rvc4_cfg = config.rvc4
-        self.compress_to_fp16 = rvc4_cfg.compress_to_fp16
         self.snpe_onnx_to_dlc = rvc4_cfg.snpe_onnx_to_dlc_args
         self.snpe_dlc_quant = rvc4_cfg.snpe_dlc_quant_args
         self.snpe_dlc_graph_prepare = rvc4_cfg.snpe_dlc_graph_prepare_args
@@ -46,6 +47,14 @@ def __init__(self, config: SingleStageConfig, output_dir: Path):
         )
         self.use_per_row_quantization = rvc4_cfg.use_per_row_quantization
         self.optimization_level = rvc4_cfg.optimization_level
+        self.quantization_mode = rvc4_cfg.quantization_mode
+        if self.quantization_mode != QuantizationMode.CUSTOM:
+            self.snpe_onnx_to_dlc = []
+            self.snpe_dlc_quant = []
+            self.snpe_dlc_graph_prepare = []
+            logger.warning(
+                f"Using pre-defined arguments for quantization mode {self.quantization_mode.value}, which will override user-provided SNPE arguments. If you need full control of SNPE arguments, set `rvc4.quantization_mode: CUSTOM` in the config or CLI."
+            )
         self.keep_raw_images = rvc4_cfg.keep_raw_images
         if "--htp_socs" in self.snpe_dlc_graph_prepare:
             i = self.snpe_dlc_graph_prepare.index("--htp_socs")
@@ -114,7 +123,7 @@ def export(self) -> Path:
             args, ["--optimization_level", str(self.optimization_level)]
         )
         self._add_args(args, ["--htp_socs", ",".join(self.htp_socs)])
-        if self.compress_to_fp16:
+        if self.quantization_mode == QuantizationMode.FP16_STD:
             self._add_args(args, ["--use_float_io"])
         self._subprocess_run(
             ["snpe-dlc-graph-prepare", *args], meta_name="graph_prepare"
@@ -157,6 +166,15 @@ def calibrate(self, dlc_path: Path) -> Path:
         if self.use_per_row_quantization:
             args.append("--use_per_row_quantization")
 
+        if self.quantization_mode == QuantizationMode.INT8_ACC:
+            self._add_args(args, ["--param_quantizer", "enhanced"])
+            self._add_args(args, ["--act_quantizer", "enhanced"])
+        elif self.quantization_mode == QuantizationMode.INT8_16_MIX:
+            self._add_args(args, ["--param_quantizer", "enhanced"])
+            self._add_args(args, ["--act_quantizer", "enhanced"])
+            self._add_args(args, ["--act_bitwidth", "16"])
+            args.append("--override_params")
+
         start_time = time.time()
         self._subprocess_run(
             ["snpe-dlc-quant", *args], meta_name="quantization_cmd"
@@ -241,6 +259,21 @@ class Entry(NamedTuple):
                 f.write(entry_str + "\n")
         return self.input_list_path
 
+    def generate_io_encodings(self) -> Path:
+        encodings_dict = {"activation_encodings": {}, "param_encodings": {}}
+        if not (list(self.inputs.keys()) and list(self.outputs.keys())):
+            logger.warning(
+                "Cannot generate I/O encodings as inputs or outputs are not defined. The resulting DLC may not be compatible with DAI."
+            )
+        for name in list(self.inputs.keys()) + list(self.outputs.keys()):
+            encodings_dict["activation_encodings"][name] = [
+                {"bitwidth": 8, "dtype": "int"}
+            ]
+        encodings_path = self.intermediate_outputs_dir / "io_encodings.json"
+        with open(encodings_path, "w") as encodings_file:
+            json.dump(encodings_dict, encodings_file, indent=4)
+        return encodings_path
+
     def onnx_to_dlc(self) -> Path:
         logger.info("Exporting for RVC4")
         args = self.snpe_onnx_to_dlc
@@ -293,8 +326,17 @@ def onnx_to_dlc(self) -> Path:
                         "Proceeding wihtout specifying layout."
                     )
 
-        if self.compress_to_fp16:
+        if self.quantization_mode == QuantizationMode.FP16_STD:
             self._add_args(args, ["--float_bitwidth", "16"])
+        elif self.quantization_mode == QuantizationMode.INT8_16_MIX:
+            io_encodings_file = self.generate_io_encodings()
+            self._add_args(
+                args,
+                [
+                    "--quantization_overrides",
+                    f"{io_encodings_file}",
+                ],
+            )
 
         if self.is_tflite:
             command = "snpe-tflite-to-dlc"

@@ -27,6 +27,7 @@
     Encoding,
     InputFileType,
     PotDevice,
+    QuantizationMode,
     ResizeMethod,
     Target,
 )
@@ -264,25 +265,23 @@ class RVC3Config(BlobBaseConfig):
 
 
 class RVC4Config(TargetConfig):
-    compress_to_fp16: bool = False
     snpe_onnx_to_dlc_args: list[str] = []
     snpe_dlc_quant_args: list[str] = []
     snpe_dlc_graph_prepare_args: list[str] = []
     keep_raw_images: bool = False
     use_per_channel_quantization: bool = True
     use_per_row_quantization: bool = False
     optimization_level: Literal[1, 2, 3] = 2
+    quantization_mode: QuantizationMode = QuantizationMode.INT8_STD
     htp_socs: list[
         Literal["sm8350", "sm8450", "sm8550", "sm8650", "qcs6490", "qcs8550"]
     ] = ["sm8550"]
 
     @model_validator(mode="after")
     def _validate_fp16(self) -> Self:
-        if not self.compress_to_fp16:
+        if self.quantization_mode != QuantizationMode.FP16_STD:
             return self
         self.disable_calibration = True
-        if "qcs8550" not in self.htp_socs:
-            self.htp_socs.append("qcs8550")
         return self
 
 

@@ -20,7 +20,12 @@
 from modelconverter.utils.constants import MISC_DIR
 from modelconverter.utils.layout import guess_new_layout, make_default_layout
 from modelconverter.utils.metadata import Metadata, get_metadata
-from modelconverter.utils.types import DataType, Encoding, Target
+from modelconverter.utils.types import (
+    DataType,
+    Encoding,
+    QuantizationMode,
+    Target,
+)
 
 
 def get_archive_input(cfg: NNArchiveConfig, name: str) -> NNArchiveInput:
@@ -231,21 +236,27 @@ def modelconverter_config_to_nn(
     target_cfg = cfg.get_target_config(target)
 
     # TODO: This might be more complicated for Hailo
-
-    onnx_args = getattr(target_cfg, "snpe_onnx_to_dlc_args", [])
-    prep_args = getattr(target_cfg, "snpe_dlc_graph_prepare_args", [])
-    fb16 = any(
-        a == "--float_bitwidth" and str(b) == "16"
-        for a, b in pairwise(onnx_args)
-    ) or any(
-        isinstance(x, str)
-        and x.startswith("--float_bitwidth=")
-        and x.split("=", 1)[1] == "16"
-        for x in onnx_args
-    )
-    compress_to_fp16 = getattr(target_cfg, "compress_to_fp16", False) or (
-        fb16 and "--use_float_io" in prep_args
-    )
+    quantization_mode = getattr(target_cfg, "quantization_mode", None)
+    if (
+        quantization_mode is None
+        or quantization_mode == QuantizationMode.CUSTOM
+    ):
+        onnx_args = getattr(target_cfg, "snpe_onnx_to_dlc_args", [])
+        prep_args = getattr(target_cfg, "snpe_dlc_graph_prepare_args", [])
+        fb16 = any(
+            a == "--float_bitwidth" and str(b) == "16"
+            for a, b in pairwise(onnx_args)
+        ) or any(
+            isinstance(x, str)
+            and x.startswith("--float_bitwidth=")
+            and x.split("=", 1)[1] == "16"
+            for x in onnx_args
+        )
+        compress_to_fp16 = getattr(target_cfg, "compress_to_fp16", False) or (
+            fb16 and "--use_float_io" in prep_args
+        )
+    else:
+        compress_to_fp16 = quantization_mode == QuantizationMode.FP16_STD
     disable_calibration = target_cfg.disable_calibration
 
     match target, compress_to_fp16, disable_calibration:
@@ -290,10 +301,10 @@ def modelconverter_config_to_nn(
             layout = make_default_layout(new_shape)
         dai_type = inp.encoding.to.value
         if inp.data_type == DataType.FLOAT16:
-            type = "F16F16F16"
+            channel_format = "F16F16F16"
         else:
-            type = "888"
-        dai_type += type
+            channel_format = "888"
+        dai_type += channel_format
         dai_type += "i" if layout == "NHWC" else "p"
 
         dtype = _get_io_dtype(

@@ -229,6 +229,14 @@ class Target(Enum):
     RVC4 = "rvc4"
 
 
+class QuantizationMode(Enum):
+    INT8_STD = "INT8_STANDARD"
+    INT8_ACC = "INT8_ACCURACY_FOCUSED"
+    INT8_16_MIX = "INT8_INT16_MIXED"
+    FP16_STD = "FP16_STANDARD"
+    CUSTOM = "CUSTOM"
+
+
 class InputFileType(Enum):
     ONNX = "ONNX"
     IR = "IR"

@@ -198,6 +198,9 @@ stages:
       # List of additional arguments to pass to SNPE snpe-dlc-graph-prepare.
       snpe_dlc_graph_prepare_args: []
 
+      # Disables calibration/quantization.
+      disable_calibration: false
+
       # Whether to include the raw images in the intermediate outputs.
       # Warning: the raw images can get very large.
       keep_raw_images: False
@@ -213,12 +216,8 @@ stages:
       # List of platforms to pre-compute the DLC graph for.
       htp_socs: ["sm8550"]
 
-      # Configures conversion to float16 precision. This will
-      # disable the calibration, add the `--float_bitwidth 16`
-      # flag to `snpe-onnx-to-dlc`,  the `--use_float_io` flag
-      # to `snpe-dlc-graph-prepare`, and the `qcs8550` platform
-      # to the `htp_socs` list.
-      compress_to_fp16: False
-
       # Optimization level for the DLC graph preparation. The available levels are: 1, 2, and 3. Higher optimization levels incur longer offline prepare time but yields more optimal graph and hence faster execution time for most graphs.
       optimization_level: 2
+
+      # Pre-defined quantization modes for the RVC4 exporter. Pre-defined modes (except CUSTOM) will override any user-provided SNPE arguments via `snpe_onnx_to_dlc_args`, `snpe_dlc_quant_args`, and `snpe_dlc_graph_prepare_args`. The available quantization modes are: INT8_STANDARD, INT8_ACCURACY_FOCUSED, INT8_INT16_MIXED, FP16_STANDARD, and CUSTOM.
+      quantization_mode: INT8_STANDARD
@@ -25,6 +25,7 @@
     Encoding,
     InputFileType,
     PotDevice,
+    QuantizationMode,
     ResizeMethod,
     Target,
 )
@@ -65,7 +66,7 @@
         "disable_calibration": False,
         "use_per_channel_quantization": True,
         "use_per_row_quantization": False,
-        "compress_to_fp16": False,
+        "quantization_mode": QuantizationMode.INT8_STD,
         "optimization_level": 2,
     },
     "hailo": {