Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/modelconverter_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ jobs:
uses: actions/checkout@v4

- name: Set up Docker
uses: crazy-max/ghaction-setup-docker@v3
uses: crazy-max/ghaction-setup-docker@v4

- name: Set up Python
uses: actions/setup-python@v5
Expand Down
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,19 @@ Below is a table of common command-line options available when using the `modelc
> [!NOTE]
> This table is not exhaustive. For more detailed information about available options, run `modelconverter convert --help` in your command line interface. You can also check all the `[ config overrides ]` available at [defaults.yaml](shared_with_container/configs/defaults.yaml).

##### RVC4 Quantization Mode Option

The `rvc4.quantization_mode` CLI option allows you to choose between different pre-defined quantization modes for RVC4 conversions. The available modes are:

- `INT8_STANDARD`: Standard INT8 quantization **with calibration** (default), for optimal performance (FPS) and model size.
- `INT8_ACCURACY_FOCUSED`: INT8 quantization **with calibration**. This mode utilizes more advanced quantization techniques that may improve accuracy without reducing performance or increasing the model size, depending on the model.
- `INT8_INT16_MIXED`: Mixed INT8 and INT16 quantization **with calibration**. This mode uses 8-bit weights and 16-bit activations across all layers for improved numeric stability and accuracy at the cost of reduced performance (FPS) and increased model size.
- `FP16_STANDARD`: FP16 quantization **without calibration**, for models that require higher accuracy and numeric stability, at the cost of performance (FPS) and increased model size.
- `CUSTOM`: Custom quantization mode, where the user can specify more advanced options in the configuration file or via command-line arguments.

> [!IMPORTANT]
> When `rvc4.quantization_mode` is set to anything other than `CUSTOM`, the default settings for that mode will override any custom settings provided in the configuration file or via command-line arguments (via the `rvc4.snpe_onnx_to_dlc_args`, `rvc4.snpe_dlc_quant_args`, or `rvc4.snpe_dlc_graph_prepare_args` options).

#### Handling Large ONNX Files (Exceeding 2GB)

When working with ONNX models that exceed 2GB in size, the model data must be stored using ONNX's external data mechanism. This separates the model structure from the large weight data.
Expand Down
48 changes: 45 additions & 3 deletions modelconverter/packages/rvc4/exporter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import shutil
import subprocess
import time
Expand All @@ -22,6 +23,7 @@
DataType,
Encoding,
InputFileType,
QuantizationMode,
ResizeMethod,
Target,
)
Expand All @@ -37,7 +39,6 @@ def __init__(self, config: SingleStageConfig, output_dir: Path):
super().__init__(config=config, output_dir=output_dir)

rvc4_cfg = config.rvc4
self.compress_to_fp16 = rvc4_cfg.compress_to_fp16
self.snpe_onnx_to_dlc = rvc4_cfg.snpe_onnx_to_dlc_args
self.snpe_dlc_quant = rvc4_cfg.snpe_dlc_quant_args
self.snpe_dlc_graph_prepare = rvc4_cfg.snpe_dlc_graph_prepare_args
Expand All @@ -46,6 +47,14 @@ def __init__(self, config: SingleStageConfig, output_dir: Path):
)
self.use_per_row_quantization = rvc4_cfg.use_per_row_quantization
self.optimization_level = rvc4_cfg.optimization_level
self.quantization_mode = rvc4_cfg.quantization_mode
if self.quantization_mode != QuantizationMode.CUSTOM:
self.snpe_onnx_to_dlc = []
self.snpe_dlc_quant = []
self.snpe_dlc_graph_prepare = []
logger.warning(
f"Using pre-defined arguments for quantization mode {self.quantization_mode.value}, which will override user-provided SNPE arguments. If you need full control of SNPE arguments, set `rvc4.quantization_mode: CUSTOM` in the config or CLI."
)
self.keep_raw_images = rvc4_cfg.keep_raw_images
if "--htp_socs" in self.snpe_dlc_graph_prepare:
i = self.snpe_dlc_graph_prepare.index("--htp_socs")
Expand Down Expand Up @@ -114,7 +123,7 @@ def export(self) -> Path:
args, ["--optimization_level", str(self.optimization_level)]
)
self._add_args(args, ["--htp_socs", ",".join(self.htp_socs)])
if self.compress_to_fp16:
if self.quantization_mode == QuantizationMode.FP16_STD:
self._add_args(args, ["--use_float_io"])
self._subprocess_run(
["snpe-dlc-graph-prepare", *args], meta_name="graph_prepare"
Expand Down Expand Up @@ -157,6 +166,15 @@ def calibrate(self, dlc_path: Path) -> Path:
if self.use_per_row_quantization:
args.append("--use_per_row_quantization")

if self.quantization_mode == QuantizationMode.INT8_ACC:
self._add_args(args, ["--param_quantizer", "enhanced"])
self._add_args(args, ["--act_quantizer", "enhanced"])
elif self.quantization_mode == QuantizationMode.INT8_16_MIX:
self._add_args(args, ["--param_quantizer", "enhanced"])
self._add_args(args, ["--act_quantizer", "enhanced"])
self._add_args(args, ["--act_bitwidth", "16"])
args.append("--override_params")

start_time = time.time()
self._subprocess_run(
["snpe-dlc-quant", *args], meta_name="quantization_cmd"
Expand Down Expand Up @@ -241,6 +259,21 @@ class Entry(NamedTuple):
f.write(entry_str + "\n")
return self.input_list_path

def generate_io_encodings(self) -> Path:
encodings_dict = {"activation_encodings": {}, "param_encodings": {}}
if not (list(self.inputs.keys()) and list(self.outputs.keys())):
logger.warning(
"Cannot generate I/O encodings as inputs or outputs are not defined. The resulting DLC may not be compatible with DAI."
)
for name in list(self.inputs.keys()) + list(self.outputs.keys()):
encodings_dict["activation_encodings"][name] = [
{"bitwidth": 8, "dtype": "int"}
]
encodings_path = self.intermediate_outputs_dir / "io_encodings.json"
with open(encodings_path, "w") as encodings_file:
json.dump(encodings_dict, encodings_file, indent=4)
return encodings_path

def onnx_to_dlc(self) -> Path:
logger.info("Exporting for RVC4")
args = self.snpe_onnx_to_dlc
Expand Down Expand Up @@ -293,8 +326,17 @@ def onnx_to_dlc(self) -> Path:
"Proceeding wihtout specifying layout."
)

if self.compress_to_fp16:
if self.quantization_mode == QuantizationMode.FP16_STD:
self._add_args(args, ["--float_bitwidth", "16"])
elif self.quantization_mode == QuantizationMode.INT8_16_MIX:
io_encodings_file = self.generate_io_encodings()
self._add_args(
args,
[
"--quantization_overrides",
f"{io_encodings_file}",
],
)

if self.is_tflite:
command = "snpe-tflite-to-dlc"
Expand Down
7 changes: 3 additions & 4 deletions modelconverter/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
Encoding,
InputFileType,
PotDevice,
QuantizationMode,
ResizeMethod,
Target,
)
Expand Down Expand Up @@ -264,25 +265,23 @@ class RVC3Config(BlobBaseConfig):


class RVC4Config(TargetConfig):
compress_to_fp16: bool = False
snpe_onnx_to_dlc_args: list[str] = []
snpe_dlc_quant_args: list[str] = []
snpe_dlc_graph_prepare_args: list[str] = []
keep_raw_images: bool = False
use_per_channel_quantization: bool = True
use_per_row_quantization: bool = False
optimization_level: Literal[1, 2, 3] = 2
quantization_mode: QuantizationMode = QuantizationMode.INT8_STD
htp_socs: list[
Literal["sm8350", "sm8450", "sm8550", "sm8650", "qcs6490", "qcs8550"]
] = ["sm8550"]

@model_validator(mode="after")
def _validate_fp16(self) -> Self:
if not self.compress_to_fp16:
if self.quantization_mode != QuantizationMode.FP16_STD:
return self
self.disable_calibration = True
if "qcs8550" not in self.htp_socs:
self.htp_socs.append("qcs8550")
return self


Expand Down
49 changes: 30 additions & 19 deletions modelconverter/utils/nn_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@
from modelconverter.utils.constants import MISC_DIR
from modelconverter.utils.layout import guess_new_layout, make_default_layout
from modelconverter.utils.metadata import Metadata, get_metadata
from modelconverter.utils.types import DataType, Encoding, Target
from modelconverter.utils.types import (
DataType,
Encoding,
QuantizationMode,
Target,
)


def get_archive_input(cfg: NNArchiveConfig, name: str) -> NNArchiveInput:
Expand Down Expand Up @@ -231,21 +236,27 @@ def modelconverter_config_to_nn(
target_cfg = cfg.get_target_config(target)

# TODO: This might be more complicated for Hailo

onnx_args = getattr(target_cfg, "snpe_onnx_to_dlc_args", [])
prep_args = getattr(target_cfg, "snpe_dlc_graph_prepare_args", [])
fb16 = any(
a == "--float_bitwidth" and str(b) == "16"
for a, b in pairwise(onnx_args)
) or any(
isinstance(x, str)
and x.startswith("--float_bitwidth=")
and x.split("=", 1)[1] == "16"
for x in onnx_args
)
compress_to_fp16 = getattr(target_cfg, "compress_to_fp16", False) or (
fb16 and "--use_float_io" in prep_args
)
quantization_mode = getattr(target_cfg, "quantization_mode", None)
if (
quantization_mode is None
or quantization_mode == QuantizationMode.CUSTOM
):
onnx_args = getattr(target_cfg, "snpe_onnx_to_dlc_args", [])
prep_args = getattr(target_cfg, "snpe_dlc_graph_prepare_args", [])
fb16 = any(
a == "--float_bitwidth" and str(b) == "16"
for a, b in pairwise(onnx_args)
) or any(
isinstance(x, str)
and x.startswith("--float_bitwidth=")
and x.split("=", 1)[1] == "16"
for x in onnx_args
)
compress_to_fp16 = getattr(target_cfg, "compress_to_fp16", False) or (
fb16 and "--use_float_io" in prep_args
)
else:
compress_to_fp16 = quantization_mode == QuantizationMode.FP16_STD
disable_calibration = target_cfg.disable_calibration

match target, compress_to_fp16, disable_calibration:
Expand Down Expand Up @@ -290,10 +301,10 @@ def modelconverter_config_to_nn(
layout = make_default_layout(new_shape)
dai_type = inp.encoding.to.value
if inp.data_type == DataType.FLOAT16:
type = "F16F16F16"
channel_format = "F16F16F16"
else:
type = "888"
dai_type += type
channel_format = "888"
dai_type += channel_format
dai_type += "i" if layout == "NHWC" else "p"

dtype = _get_io_dtype(
Expand Down
8 changes: 8 additions & 0 deletions modelconverter/utils/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,14 @@ class Target(Enum):
RVC4 = "rvc4"


class QuantizationMode(Enum):
INT8_STD = "INT8_STANDARD"
INT8_ACC = "INT8_ACCURACY_FOCUSED"
INT8_16_MIX = "INT8_INT16_MIXED"
FP16_STD = "FP16_STANDARD"
CUSTOM = "CUSTOM"


class InputFileType(Enum):
ONNX = "ONNX"
IR = "IR"
Expand Down
13 changes: 6 additions & 7 deletions shared_with_container/configs/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,9 @@ stages:
# List of additional arguments to pass to SNPE snpe-dlc-graph-prepare.
snpe_dlc_graph_prepare_args: []

# Disables calibration/quantization.
disable_calibration: false

# Whether to include the raw images in the intermediate outputs.
# Warning: the raw images can get very large.
keep_raw_images: False
Expand All @@ -213,12 +216,8 @@ stages:
# List of platforms to pre-compute the DLC graph for.
htp_socs: ["sm8550"]

# Configures conversion to float16 precision. This will
# disable the calibration, add the `--float_bitwidth 16`
# flag to `snpe-onnx-to-dlc`, the `--use_float_io` flag
# to `snpe-dlc-graph-prepare`, and the `qcs8550` platform
# to the `htp_socs` list.
compress_to_fp16: False

# Optimization level for the DLC graph preparation. The available levels are: 1, 2, and 3. Higher optimization levels incur longer offline prepare time but yields more optimal graph and hence faster execution time for most graphs.
optimization_level: 2

# Pre-defined quantization modes for the RVC4 exporter. Pre-defined modes (except CUSTOM) will override any user-provided SNPE arguments via `snpe_onnx_to_dlc_args`, `snpe_dlc_quant_args`, and `snpe_dlc_graph_prepare_args`. The available quantization modes are: INT8_STANDARD, INT8_ACCURACY_FOCUSED, INT8_INT16_MIXED, FP16_STANDARD, and CUSTOM.
quantization_mode: INT8_STANDARD
3 changes: 2 additions & 1 deletion tests/test_utils/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
Encoding,
InputFileType,
PotDevice,
QuantizationMode,
ResizeMethod,
Target,
)
Expand Down Expand Up @@ -65,7 +66,7 @@
"disable_calibration": False,
"use_per_channel_quantization": True,
"use_per_row_quantization": False,
"compress_to_fp16": False,
"quantization_mode": QuantizationMode.INT8_STD,
"optimization_level": 2,
},
"hailo": {
Expand Down
Loading