enable test for ipex

Devjiu · Devjiu · commit a1826d6dfab3 · 2025-06-03T12:08:26.000Z
diff --git a/bitsandbytes/backends/triton/ops.py b/bitsandbytes/backends/triton/ops.py
@@ -2,20 +2,14 @@
 
 import torch
 
+from . import triton_kernels
+
 # currently codes unused, kept for reference
 # Should be the same for quant/dequant
 # from bitsandbytes.functional import get_4bit_type
 # _FP4_QUANT_TABLE = get_4bit_type("fp4", device="xpu")
 # _NF4_QUANT_TABLE = get_4bit_type("nf4", device="xpu")
 
-try:
-    from . import triton_kernels
-
-    triton_available = True
-except ImportError as e:
-    print("Import error:", e)
-    triton_available = False
-
 
 def quantize_blockwise(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> tuple[torch.Tensor, torch.Tensor]:
     torch._check_is_size(blocksize)
diff --git a/bitsandbytes/backends/utils.py b/bitsandbytes/backends/utils.py
@@ -10,6 +10,15 @@
     ipex_cpu = None
     ipex_xpu = None
 
+try:
+    import triton  # noqa: F401
+    import triton.language as tl  # noqa: F401
+
+    triton_available = True
+except ImportError as e:
+    triton_available = False
+
+
 _NF4_QUANT_TABLE = torch.tensor(
     [
         -1.0,
diff --git a/bitsandbytes/backends/xpu/ops.py b/bitsandbytes/backends/xpu/ops.py
@@ -4,10 +4,10 @@
 import torch
 
 from ..._ops import register_kernel
-from ..utils import ipex_xpu
+from ..utils import ipex_xpu, triton_available
 
-# With default torch, error:
-#  NotImplementedError: The operator 'aten::_int_mm' for XPU
+# _int_mm is available in torch starting from 2.7 version,
+# but currently it's don't have xpu implementation.
 if ipex_xpu and torch.__version__ >= (2, 7):
 
     @register_kernel("bitsandbytes::int8_linear_matmul", "xpu")
@@ -18,6 +18,7 @@ def _(A: torch.Tensor, B: torch.Tensor):
         ).reshape(*A.shape[:-1], B.shape[0])
 
 
+# IPEX should be faster for xpu, so at first checking if it is available.
 if ipex_xpu:
 
     @register_kernel("bitsandbytes::dequantize_nf4_ipex", "xpu")
@@ -52,23 +53,15 @@ def _(
             raise ValueError(f"Blockwise quantization only supports 16/32-bit floats, but got {out.dtype}")
 
         return out.reshape(shape)
-else:
-    # IPEX should be faster for xpu, so at first checking if it is available.
-    try:
-        from ..triton import ops as triton_ops
-
-        triton_available = True
-    except ImportError as e:
-        print("Import error:", e)
-        triton_available = False
+elif triton_available:
+    from ..triton import ops as triton_ops
 
-    if triton_available:
-        register_kernel("bitsandbytes::quantize_blockwise", "xpu")(triton_ops.quantize_blockwise)
-        register_kernel("bitsandbytes::dequantize_blockwise.out", "xpu")(triton_ops.dequantize_blockwise_inplace)
-        register_kernel("bitsandbytes::dequantize_blockwise", "xpu")(triton_ops.dequantize_blockwise)
-        register_kernel("bitsandbytes::quantize_4bit", "xpu")(triton_ops.quantize_4bit)
-        register_kernel("bitsandbytes::dequantize_4bit.out", "xpu")(triton_ops.dequantize_4bit_inplace)
-        register_kernel("bitsandbytes::dequantize_4bit", "xpu")(triton_ops.dequantize_4bit)
-        register_kernel("bitsandbytes::gemv_4bit", "xpu")(triton_ops.gemv_4bit)
-    else:
-        warnings.warn("XPU available, but trtion package is missing.")
+    register_kernel("bitsandbytes::quantize_blockwise", "xpu")(triton_ops.quantize_blockwise)
+    register_kernel("bitsandbytes::dequantize_blockwise.out", "xpu")(triton_ops.dequantize_blockwise_inplace)
+    register_kernel("bitsandbytes::dequantize_blockwise", "xpu")(triton_ops.dequantize_blockwise)
+    register_kernel("bitsandbytes::quantize_4bit", "xpu")(triton_ops.quantize_4bit)
+    register_kernel("bitsandbytes::dequantize_4bit.out", "xpu")(triton_ops.dequantize_4bit_inplace)
+    register_kernel("bitsandbytes::dequantize_4bit", "xpu")(triton_ops.dequantize_4bit)
+    register_kernel("bitsandbytes::gemv_4bit", "xpu")(triton_ops.gemv_4bit)
+else:
+    warnings.warn("XPU available but no ipex or triton packages found.")
diff --git a/tests/test_modules.py b/tests/test_modules.py
@@ -5,6 +5,8 @@
 from torch import nn
 
 import bitsandbytes as bnb
+from bitsandbytes.backends.utils import triton_available
+from bitsandbytes.functional import ipex_xpu
 from tests.helpers import get_available_devices, id_formatter
 
 
@@ -287,8 +289,8 @@ def test_linear_kbit_fp32_bias(device, module):
 def test_kbit_backprop(device, module):
     if device == "cpu":
         pytest.xfail("Test is not yet supported on CPU")
-    if device == "xpu":
-        pytest.xfail("Missing int8_double_quant implementation XPU")
+    if device == "xpu" and module == bnb.nn.Linear8bitLt and not ipex_xpu and triton_available:
+        pytest.xfail("Missing int8_double_quant implementation in Triton for XPU")
 
     b = 16
     dim1 = 36