Add handling for Qwen3VLMoe on older transformers versions (#2040)

fynnsu · web-flow · commit 6e459edd9970 · 2025-11-17T22:15:13.000Z
SUMMARY: #1981 added Qwen3VLMoe with associated tests, however this model isn't available on all transformers versions that we support. Therefore, (similar to #2030) this pr ensures we don't import or test the model when using a transformers version that doesn't support it. TEST PLAN: Confirmed that this change fixes `import llmcompressor` when using oldest support transformers version `4.54.0`. Ran test with old transformers version (test gets skipped) and new transformers version (test passes). --------- Signed-off-by: Fynn Schmitt-Ulms <fschmitt@redhat.com>
diff --git a/src/llmcompressor/modeling/qwen3_vl_moe.py b/src/llmcompressor/modeling/qwen3_vl_moe.py
@@ -1,12 +1,16 @@
+from typing import TYPE_CHECKING
+
 import torch
-from transformers import Qwen3VLMoeConfig, Qwen3VLMoeTextConfig
-from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import (
-    Qwen3VLMoeTextSparseMoeBlock as OriginalQwen3VLMoeTextSparseMoeBlock,
-)
 
 from llmcompressor.modeling.moe_context import MoECalibrationModule
 from llmcompressor.utils.dev import skip_weights_initialize
 
+if TYPE_CHECKING:
+    from transformers import Qwen3VLMoeConfig, Qwen3VLMoeTextConfig
+    from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import (
+        Qwen3VLMoeTextSparseMoeBlock,
+    )
+
 
 @MoECalibrationModule.register("Qwen3VLMoeTextSparseMoeBlock")
 class CalibrateQwen3VLMoeTextSparseMoeBlock(MoECalibrationModule):
@@ -19,12 +23,12 @@ class CalibrateQwen3VLMoeTextSparseMoeBlock(MoECalibrationModule):
 
     def __init__(
         self,
-        original: OriginalQwen3VLMoeTextSparseMoeBlock,
-        config: Qwen3VLMoeConfig,
+        original: "Qwen3VLMoeTextSparseMoeBlock",
+        config: "Qwen3VLMoeConfig",
         calibrate_all_experts: bool,
     ):
         super().__init__()
-        text_config: Qwen3VLMoeTextConfig = config.get_text_config()
+        text_config: "Qwen3VLMoeTextConfig" = config.get_text_config()
 
         self.hidden_size = text_config.hidden_size
         self.num_experts = text_config.num_experts
@@ -115,8 +119,8 @@ def __init__(self, config, original):
 
 
 def replace(
-    config: Qwen3VLMoeConfig,
-    original: OriginalQwen3VLMoeTextSparseMoeBlock,
+    config: "Qwen3VLMoeConfig",
+    original: "Qwen3VLMoeTextSparseMoeBlock",
     calibrate_all_experts: bool,
 ):
     return CalibrateQwen3VLMoeTextSparseMoeBlock(
diff --git a/tests/llmcompressor/modeling/test_calib_qwen3_vl_moe.py b/tests/llmcompressor/modeling/test_calib_qwen3_vl_moe.py
@@ -1,13 +1,19 @@
+import pytest
 import torch
-from transformers import Qwen3VLMoeConfig
-from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import (
-    Qwen3VLMoeTextSparseMoeBlock,
-)
 
 from llmcompressor.modeling.qwen3_vl_moe import CalibrateQwen3VLMoeTextSparseMoeBlock
 from llmcompressor.utils.helpers import calibration_forward_context
 from tests.testing_utils import requires_gpu
 
+Qwen3VLMoeConfig = pytest.importorskip(
+    "transformers.models.qwen3_vl_moe.configuration_qwen3_vl_moe",
+    reason="Qwen3VLMoeConfig not available in this version of transformers",
+).Qwen3VLMoeConfig
+Qwen3VLMoeTextSparseMoeBlock = pytest.importorskip(
+    "transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe",
+    reason="Qwen3VLMoeTextSparseMoeBlock not available in this version of transformers",
+).Qwen3VLMoeTextSparseMoeBlock
+
 
 @requires_gpu
 def test_calib_qwen3_vl_moe_module():