Support diffusion model saving (#1519)

mengniwang95 · web-flow · commit a0ad7780c7a0 · 2026-03-10T16:26:50.000+08:00
diff --git a/auto_round/compressors/diffusion/compressor.py b/auto_round/compressors/diffusion/compressor.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 from collections import defaultdict
 from copy import deepcopy
 from typing import Union
@@ -22,6 +23,7 @@
 from auto_round.compressors.base import BaseCompressor
 from auto_round.compressors.diffusion.dataset import get_diffusion_dataloader
 from auto_round.compressors.utils import block_forward
+from auto_round.formats import OutputFormat
 from auto_round.logger import logger
 from auto_round.schemes import QuantizationScheme
 from auto_round.utils import (
@@ -112,13 +114,7 @@ def __init__(
             device_map = 0
         self._set_device(device_map)
 
-        if isinstance(model, str):
-            pipe, model = diffusion_load_model(model, platform=platform, device=self.device, model_dtype=model_dtype)
-        elif isinstance(model, pipeline_utils.DiffusionPipeline):
-            pipe = model
-            model = pipe.transformer
-        else:
-            raise ValueError(f"Only support str or DiffusionPipeline class for model, but get {type(model)}")
+        pipe, model = diffusion_load_model(model, platform=platform, device=self.device, model_dtype=model_dtype)
 
         self.model = model
         self.pipe = pipe
@@ -373,6 +369,33 @@ def calib(self, nsamples, bs):
 
         # torch.cuda.empty_cache()
 
+    def _get_save_folder_name(self, format: OutputFormat) -> str:
+        """Generates the save folder name based on the provided format string.
+
+        If there are multiple formats to handle, the function creates a subfolder
+        named after the format string with special characters replaced. If there's
+        only one format, it returns the original output directory directly.
+
+        Args:
+            format_str (str): The format identifier (e.g., 'gguf:q2_k_s').
+
+        Returns:
+            str: The path to the folder where results should be saved.
+        """
+        # Replace special characters to make the folder name filesystem-safe
+        sanitized_format = format.get_backend_name().replace(":", "-").replace("_", "-")
+
+        # Use a subfolder only if there are multiple formats
+        if len(self.formats) > 1:
+            return (
+                os.path.join(self.orig_output_dir, sanitized_format, "transformer")
+                if self.is_immediate_saving
+                else os.path.join(self.orig_output_dir, sanitized_format, "transformer")
+            )
+
+        # if use is_immediate_saving, we need to save model in self.orig_output_dir/transformer folder
+        return os.path.join(self.orig_output_dir, "transformer") if self.is_immediate_saving else self.orig_output_dir
+
     def save_quantized(self, output_dir=None, format="auto_round", inplace=True, **kwargs):
         """Save the quantized model to the specified output directory in the specified format.
 
@@ -385,5 +408,27 @@ def save_quantized(self, output_dir=None, format="auto_round", inplace=True, **k
         Returns:
             object: The compressed model object.
         """
-        compressed_model = super().save_quantized(output_dir=output_dir, format=format, inplace=inplace, **kwargs)
+        if output_dir is None:
+            return super().save_quantized(output_dir, format=format, inplace=inplace, **kwargs)
+
+        compressed_model = None
+        for name in self.pipe.components.keys():
+            val = getattr(self.pipe, name)
+            sub_module_path = (
+                os.path.join(output_dir, name) if os.path.basename(os.path.normpath(output_dir)) != name else output_dir
+            )
+            if (
+                hasattr(val, "config")
+                and hasattr(val.config, "_name_or_path")
+                and val.config._name_or_path == self.model.config._name_or_path
+            ):
+                compressed_model = super().save_quantized(
+                    output_dir=sub_module_path if not self.is_immediate_saving else output_dir,
+                    format=format,
+                    inplace=inplace,
+                    **kwargs,
+                )
+            elif val is not None and hasattr(val, "save_pretrained"):
+                val.save_pretrained(sub_module_path)
+        self.pipe.config.save_pretrained(output_dir)
         return compressed_model
diff --git a/auto_round/eval/evaluation.py b/auto_round/eval/evaluation.py
@@ -100,10 +100,15 @@ def evaluate_diffusion_model(args, autoround=None, model=None, pipe=None):
 
     import torch
 
-    from auto_round.utils import detect_device, get_model_dtype, logger
+    from auto_round.utils import detect_device, get_model_dtype, logger, unsupported_meta_device
 
     # Prepare inference pipeline
     if pipe is None:
+        if model is not None and unsupported_meta_device(model):
+            logger.error(
+                "Quantized model is meta and diffusers doesn't support loading auto-round quantized model now. Exit."
+            )
+            exit(0)
         pipe = autoround.pipe
         pipe.to(model.dtype)
         pipe.transformer = model
diff --git a/auto_round/utils/model.py b/auto_round/utils/model.py
@@ -527,6 +527,8 @@ def diffusion_load_model(
     model_dtype: str = None,
     **kwargs,
 ):
+    from functools import partial
+
     from auto_round.utils.common import LazyImport
     from auto_round.utils.device import get_device_and_parallelism
 
@@ -543,12 +545,68 @@ def diffusion_load_model(
         torch_dtype = torch.bfloat16
 
     pipelines = LazyImport("diffusers.pipelines")
+    if isinstance(pretrained_model_name_or_path, str):
+        if torch_dtype == "auto":
+            torch_dtype = {}
+            model_index = os.path.join(pretrained_model_name_or_path, "model_index.json")
+            with open(model_index, "r", encoding="utf-8") as file:
+                config = json.load(file)
+            for k, v in config.items():
+                component_folder = os.path.join(pretrained_model_name_or_path, k)
+                if isinstance(v, list) and os.path.exists(os.path.join(component_folder, "config.json")):
+                    component_folder = os.path.join(pretrained_model_name_or_path, k)
+                    with open(os.path.join(component_folder, "config.json"), "r", encoding="utf-8") as file:
+                        component_config = json.load(file)
+                    torch_dtype[k] = component_config.get("torch_dtype", "auto")
+
+        pipe = pipelines.auto_pipeline.AutoPipelineForText2Image.from_pretrained(
+            pretrained_model_name_or_path, torch_dtype=torch_dtype
+        )
+        pipe_config = pipe.load_config(pretrained_model_name_or_path)
+
+    elif isinstance(pretrained_model_name_or_path, pipelines.pipeline_utils.DiffusionPipeline):
+        pipe = pretrained_model_name_or_path
+        pipe_config = pipe.load_config(pipe.config["_name_or_path"])
+
+    else:
+        raise ValueError(
+            f"Only support str or DiffusionPipeline class for model, but get {type(pretrained_model_name_or_path)}"
+        )
+
+    # add missing key
+    for k, v in pipe_config.items():
+        if k not in pipe.config:
+            pipe.config[k] = v
 
-    pipe = pipelines.auto_pipeline.AutoPipelineForText2Image.from_pretrained(
-        pretrained_model_name_or_path, torch_dtype=torch_dtype
-    )
     pipe = _to_model_dtype(pipe, model_dtype)
     model = pipe.transformer
+
+    def config_save_pretrained(config, file_name, save_directory):
+        if os.path.isfile(save_directory):
+            raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file")
+        os.makedirs(save_directory, exist_ok=True)
+        output_config_file = os.path.join(save_directory, file_name)
+
+        config_dict = dict(config)
+        if file_name == "config.json" and hasattr(model.config, "quantization_config"):
+            config_dict["quantization_config"] = model.config.quantization_config
+
+        with open(output_config_file, "w", encoding="utf-8") as writer:
+            writer.write(json.dumps(config_dict, indent=2, sort_keys=True) + "\n")
+
+    # meta model uses model.config.save_pretrained for config saving
+    setattr(model.config, "save_pretrained", partial(config_save_pretrained, model.config, "config.json"))
+    setattr(pipe.config, "save_pretrained", partial(config_save_pretrained, pipe.config, "model_index.json"))
+
+    def model_save_pretrained(model, save_directory, **kwargs):
+        super(model.__class__, model).save_pretrained(save_directory, **kwargs)
+        if hasattr(model.config, "quantization_config"):
+            model.config["quantization_config"] = model.config.quantization_config
+        with open(os.path.join(save_directory, "config.json"), "w", encoding="utf-8") as writer:
+            writer.write(json.dumps(dict(model.config), indent=2, sort_keys=True) + "\n")
+
+    # non-meta model uses model.save_pretrained for model and config saving
+    setattr(model, "save_pretrained", partial(model_save_pretrained, model))
     return pipe, model.to(device)
 
 
diff --git a/test/test_cpu/models/test_diffusion.py b/test/test_cpu/models/test_diffusion.py
@@ -1,6 +1,8 @@
+import os
 import shutil
 
 import pytest
+import torch
 from packaging import version
 
 from auto_round import AutoRound
@@ -16,11 +18,32 @@ def setup_flux():
     from diffusers import AutoPipelineForText2Image
 
     model_name = flux_name_or_path
-    pipe = AutoPipelineForText2Image.from_pretrained(model_name)
+    # use bf16 to reduce the saved model size
+    pipe = AutoPipelineForText2Image.from_pretrained(model_name, torch_dtype=torch.bfloat16)
     output_dir = "./tmp/test_quantized_flux"
     return pipe, output_dir
 
 
+@pytest.mark.skipif(
+    transformers_version >= version.parse("5.0.0"),
+    reason="cannot import name 'MT5Tokenizer' from 'transformers', https://github.com/huggingface/diffusers/issues/13035",
+)
+def test_flux_saving(setup_flux):
+    pipe, output_dir = setup_flux
+    autoround = AutoRound(
+        pipe,
+        tokenizer=None,
+        scheme="W4A16",
+        iters=0,
+        num_inference_steps=2,
+        disable_opt_rtn=True,
+    )
+    autoround.quantize_and_save(output_dir)
+    assert os.path.exists(os.path.join(output_dir, "model_index.json"))
+    assert os.path.exists(os.path.join(output_dir, "transformer", "quantization_config.json"))
+    shutil.rmtree(output_dir, ignore_errors=True)
+
+
 @pytest.mark.skipif(
     transformers_version >= version.parse("5.0.0"),
     reason="cannot import name 'MT5Tokenizer' from 'transformers', https://github.com/huggingface/diffusers/issues/13035",