Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions modelconverter/hub/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ Parameters specific to the `RVC4` conversion.
| `snpe_dlc_graph_prepare_args` | `list[str] \| None` | The arguments to pass to the `snpe-dlc-graph-prepare` tool. |
| `use_per_channel_quantization` | `bool` | Whether to use per-channel quantization. Defaults to `True`. |
| `use_per_row_quantization` | `bool` | Whether to use per-row quantization. Defaults to `False`. |
| `quantization_mode` | `Literal["INT8_STANDARD", "INT8_ACCURACY_FOCUSED", "INT8_INT16_MIXED", "FP16_STANDARD", "CUSTOM"]` | The pre-defined quantization mode to use. Defaults to `INT8_STANDARD`. |
| `htp_socs` | `list[str] \| None` | The list of HTP SoCs to use. |

**Hailo Parameters**
Expand Down
15 changes: 14 additions & 1 deletion modelconverter/hub/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from luxonis_ml.typing import Kwargs, PathType

from modelconverter.utils.types import PotDevice, Target
from modelconverter.utils.types import PotDevice, QuantizationMode, Target

from .__main__ import convert as cli_convert

Expand Down Expand Up @@ -125,6 +125,14 @@ def RVC4(
snpe_dlc_graph_prepare_args: list[str] | None = None,
use_per_channel_quantization: bool = True,
use_per_row_quantization: bool = False,
quantization_mode: QuantizationMode
| Literal[
"INT8_STANDARD",
"INT8_ACCURACY_FOCUSED",
"INT8_INT16_MIXED",
"FP16_STANDARD",
"CUSTOM",
] = QuantizationMode.INT8_STD,
htp_socs: list[
Literal[
"sm8350", "sm8450", "sm8550", "sm8650", "qcs6490", "qcs8550"
Expand All @@ -150,6 +158,8 @@ def RVC4(
Whether to use per-channel quantization.
use_per_row_quantization : bool, default False
Whether to use per-row quantization.
quantization_mode : QuantizationMode | Literal["INT8_STANDARD", "INT8_ACCURACY_FOCUSED", "INT8_INT16_MIXED", "FP16_STANDARD", "CUSTOM"], default QuantizationMode.INT8_STD
Pre-defined quantization modes for the model conversion.
htp_socs : list[str] | None, optional
List of HTP SoCs for the final DLC graph.
opts : dict[str, Any] | list[str] | None, optional
Expand All @@ -160,6 +170,8 @@ def RVC4(
online conversion.
"""
htp_socs = htp_socs or ["sm8550"]
if not isinstance(quantization_mode, QuantizationMode):
quantization_mode = QuantizationMode(quantization_mode)
return cli_convert(
Target.RVC4,
_combine_opts(
Expand All @@ -172,6 +184,7 @@ def RVC4(
"use_per_channel_quantization": use_per_channel_quantization
or [],
"use_per_row_quantization": use_per_row_quantization,
"quantization_mode": quantization_mode.value,
"htp_socs": htp_socs,
},
opts,
Expand Down
48 changes: 45 additions & 3 deletions modelconverter/packages/rvc4/exporter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import shutil
import subprocess
import time
Expand All @@ -22,6 +23,7 @@
DataType,
Encoding,
InputFileType,
QuantizationMode,
ResizeMethod,
Target,
)
Expand All @@ -37,7 +39,6 @@ def __init__(self, config: SingleStageConfig, output_dir: Path):
super().__init__(config=config, output_dir=output_dir)

rvc4_cfg = config.rvc4
self.compress_to_fp16 = rvc4_cfg.compress_to_fp16
self.snpe_onnx_to_dlc = rvc4_cfg.snpe_onnx_to_dlc_args
self.snpe_dlc_quant = rvc4_cfg.snpe_dlc_quant_args
self.snpe_dlc_graph_prepare = rvc4_cfg.snpe_dlc_graph_prepare_args
Expand All @@ -46,6 +47,14 @@ def __init__(self, config: SingleStageConfig, output_dir: Path):
)
self.use_per_row_quantization = rvc4_cfg.use_per_row_quantization
self.optimization_level = rvc4_cfg.optimization_level
self.quantization_mode = rvc4_cfg.quantization_mode
if self.quantization_mode != QuantizationMode.CUSTOM:
self.snpe_onnx_to_dlc = []
self.snpe_dlc_quant = []
self.snpe_dlc_graph_prepare = []
logger.warning(
f"Overriding user-provided SNPE arguments. Using pre-defined arguments for quantization mode {self.quantization_mode.value}."
)
self.keep_raw_images = rvc4_cfg.keep_raw_images
if "--htp_socs" in self.snpe_dlc_graph_prepare:
i = self.snpe_dlc_graph_prepare.index("--htp_socs")
Expand Down Expand Up @@ -114,7 +123,7 @@ def export(self) -> Path:
args, ["--optimization_level", str(self.optimization_level)]
)
self._add_args(args, ["--htp_socs", ",".join(self.htp_socs)])
if self.compress_to_fp16:
if self.quantization_mode == QuantizationMode.FP16_STD:
self._add_args(args, ["--use_float_io"])
self._subprocess_run(
["snpe-dlc-graph-prepare", *args], meta_name="graph_prepare"
Expand Down Expand Up @@ -157,6 +166,15 @@ def calibrate(self, dlc_path: Path) -> Path:
if self.use_per_row_quantization:
args.append("--use_per_row_quantization")

if self.quantization_mode == QuantizationMode.INT8_ACC:
self._add_args(args, ["--param_quantizer", "enhanced"])
self._add_args(args, ["--act_quantizer", "enhanced"])
elif self.quantization_mode == QuantizationMode.INT8_16_MIX:
self._add_args(args, ["--param_quantizer", "enhanced"])
self._add_args(args, ["--act_quantizer", "enhanced"])
self._add_args(args, ["--act_bitwidth", "16"])
args.append("--override_params")

start_time = time.time()
self._subprocess_run(
["snpe-dlc-quant", *args], meta_name="quantization_cmd"
Expand Down Expand Up @@ -241,6 +259,21 @@ class Entry(NamedTuple):
f.write(entry_str + "\n")
return self.input_list_path

def generate_io_encodings(self) -> Path:
encodings_dict = {"activation_encodings": {}, "param_encodings": {}}
if not (list(self.inputs.keys()) and list(self.outputs.keys())):
logger.warning(
"Cannot generate I/O encodings as inputs or outputs are not defined. The resulting DLC may not be compatible with DAI."
)
for name in list(self.inputs.keys()) + list(self.outputs.keys()):
encodings_dict["activation_encodings"][name] = [
{"bitwidth": 8, "dtype": "int"}
]
encodings_path = self.intermediate_outputs_dir / "io_encodings.json"
with open(encodings_path, "w") as encodings_file:
json.dump(encodings_dict, encodings_file, indent=4)
return encodings_path

def onnx_to_dlc(self) -> Path:
logger.info("Exporting for RVC4")
args = self.snpe_onnx_to_dlc
Expand Down Expand Up @@ -293,8 +326,17 @@ def onnx_to_dlc(self) -> Path:
"Proceeding wihtout specifying layout."
)

if self.compress_to_fp16:
if self.quantization_mode == QuantizationMode.FP16_STD:
self._add_args(args, ["--float_bitwidth", "16"])
elif self.quantization_mode == QuantizationMode.INT8_16_MIX:
io_encodings_file = self.generate_io_encodings()
self._add_args(
args,
[
"--quantization_overrides",
f"{io_encodings_file}",
],
)

if self.is_tflite:
command = "snpe-tflite-to-dlc"
Expand Down
5 changes: 3 additions & 2 deletions modelconverter/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
Encoding,
InputFileType,
PotDevice,
QuantizationMode,
ResizeMethod,
Target,
)
Expand Down Expand Up @@ -264,21 +265,21 @@ class RVC3Config(BlobBaseConfig):


class RVC4Config(TargetConfig):
compress_to_fp16: bool = False
snpe_onnx_to_dlc_args: list[str] = []
snpe_dlc_quant_args: list[str] = []
snpe_dlc_graph_prepare_args: list[str] = []
keep_raw_images: bool = False
use_per_channel_quantization: bool = True
use_per_row_quantization: bool = False
optimization_level: Literal[1, 2, 3] = 2
quantization_mode: QuantizationMode = QuantizationMode.INT8_STD
htp_socs: list[
Literal["sm8350", "sm8450", "sm8550", "sm8650", "qcs6490", "qcs8550"]
] = ["sm8550"]

@model_validator(mode="after")
def _validate_fp16(self) -> Self:
if not self.compress_to_fp16:
if self.quantization_mode != QuantizationMode.FP16_STD:
return self
self.disable_calibration = True
if "qcs8550" not in self.htp_socs:
Expand Down
49 changes: 30 additions & 19 deletions modelconverter/utils/nn_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@
from modelconverter.utils.constants import MISC_DIR
from modelconverter.utils.layout import guess_new_layout, make_default_layout
from modelconverter.utils.metadata import Metadata, get_metadata
from modelconverter.utils.types import DataType, Encoding, Target
from modelconverter.utils.types import (
DataType,
Encoding,
QuantizationMode,
Target,
)


def get_archive_input(cfg: NNArchiveConfig, name: str) -> NNArchiveInput:
Expand Down Expand Up @@ -231,21 +236,27 @@ def modelconverter_config_to_nn(
target_cfg = cfg.get_target_config(target)

# TODO: This might be more complicated for Hailo

onnx_args = getattr(target_cfg, "snpe_onnx_to_dlc_args", [])
prep_args = getattr(target_cfg, "snpe_dlc_graph_prepare_args", [])
fb16 = any(
a == "--float_bitwidth" and str(b) == "16"
for a, b in pairwise(onnx_args)
) or any(
isinstance(x, str)
and x.startswith("--float_bitwidth=")
and x.split("=", 1)[1] == "16"
for x in onnx_args
)
compress_to_fp16 = getattr(target_cfg, "compress_to_fp16", False) or (
fb16 and "--use_float_io" in prep_args
)
quantization_mode = getattr(target_cfg, "quantization_mode", None)
if (
quantization_mode is None
or quantization_mode == QuantizationMode.CUSTOM
):
onnx_args = getattr(target_cfg, "snpe_onnx_to_dlc_args", [])
prep_args = getattr(target_cfg, "snpe_dlc_graph_prepare_args", [])
fb16 = any(
a == "--float_bitwidth" and str(b) == "16"
for a, b in pairwise(onnx_args)
) or any(
isinstance(x, str)
and x.startswith("--float_bitwidth=")
and x.split("=", 1)[1] == "16"
for x in onnx_args
)
compress_to_fp16 = getattr(target_cfg, "compress_to_fp16", False) or (
fb16 and "--use_float_io" in prep_args
)
else:
compress_to_fp16 = quantization_mode == QuantizationMode.FP16_STD
disable_calibration = target_cfg.disable_calibration

match target, compress_to_fp16, disable_calibration:
Expand Down Expand Up @@ -290,10 +301,10 @@ def modelconverter_config_to_nn(
layout = make_default_layout(new_shape)
dai_type = inp.encoding.to.value
if inp.data_type == DataType.FLOAT16:
type = "F16F16F16"
channel_format = "F16F16F16"
else:
type = "888"
dai_type += type
channel_format = "888"
dai_type += channel_format
dai_type += "i" if layout == "NHWC" else "p"

dtype = _get_io_dtype(
Expand Down
9 changes: 9 additions & 0 deletions modelconverter/utils/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,15 @@ class Target(Enum):
RVC4 = "rvc4"


class QuantizationMode(Enum):
INT8_STD = "INT8_STANDARD"
INT8_ACC = "INT8_ACCURACY_FOCUSED"
INT8_16_MIX = "INT8_INT16_MIXED"
INT8_FP16_MIX = "INT8_FP16_MIXED" # Added for future use
FP16_STD = "FP16_STANDARD"
CUSTOM = "CUSTOM"


class InputFileType(Enum):
ONNX = "ONNX"
IR = "IR"
Expand Down
10 changes: 3 additions & 7 deletions shared_with_container/configs/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -213,12 +213,8 @@ stages:
# List of platforms to pre-compute the DLC graph for.
htp_socs: ["sm8550"]

# Configures conversion to float16 precision. This will
# disable the calibration, add the `--float_bitwidth 16`
# flag to `snpe-onnx-to-dlc`, the `--use_float_io` flag
# to `snpe-dlc-graph-prepare`, and the `qcs8550` platform
# to the `htp_socs` list.
compress_to_fp16: False

# Optimization level for the DLC graph preparation. The available levels are: 1, 2, and 3. Higher optimization levels incur longer offline prepare time but yields more optimal graph and hence faster execution time for most graphs.
optimization_level: 2

# Pre-defined quantization modes for the RVC4 exporter. Pre-defined modes (except CUSTOM) will override any user-provided SNPE arguments via `snpe_onnx_to_dlc_args`, `snpe_dlc_quant_args`, and `snpe_dlc_graph_prepare_args`. The available quantization modes are: INT8_STANDARD, INT8_ACCURACY_FOCUSED, INT8_INT16_MIXED, FP16_STANDARD, and CUSTOM.
quantization_mode: INT8_STANDARD
3 changes: 2 additions & 1 deletion tests/test_utils/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
Encoding,
InputFileType,
PotDevice,
QuantizationMode,
ResizeMethod,
Target,
)
Expand Down Expand Up @@ -65,7 +66,7 @@
"disable_calibration": False,
"use_per_channel_quantization": True,
"use_per_row_quantization": False,
"compress_to_fp16": False,
"quantization_mode": QuantizationMode.INT8_STD,
"optimization_level": 2,
},
"hailo": {
Expand Down
Loading