How to use quantizer after pipeline loaded?
# Quantization occurs at load time.
pipe = QwenImagePipeline.from_pretrained(
(
args.model_path
if args.model_path is not None
else os.environ.get(
"QWEN_IMAGE_DIR",
"Qwen/Qwen-Image",
)
),
scheduler=scheduler,
torch_dtype=torch.bfloat16,
quantization_config=quantization_config,
)
# Load on CPU -> Load and fuse lora -> quantize -> to GPU