bugfix: Choose sm90 kernels only for Hopper GPUs. (#719)

bobboli · web-flow · commit 06309c4e6204 · 2025-01-06T11:38:18.000-08:00
Some kernels use instructions specific to Hopper, which might not be
compatible with future GPUs. Fallback to non-Hopper kernels for all the
other GPUs.
diff --git a/flashinfer/utils.py b/flashinfer/utils.py
@@ -262,7 +262,7 @@ def get_cuda_stream(device: torch.device) -> int:
 
 def determine_gemm_backend(device: torch.device) -> str:
     major, _ = get_compute_capability(device)
-    if major >= 9 and torch.version.cuda >= "12.3":
+    if major == 9 and torch.version.cuda >= "12.3":
         return "sm90"
     else:
         return "sm80"
@@ -349,7 +349,7 @@ def determine_attention_backend(
     major, _ = get_compute_capability(device)
 
     if (
-        major >= 9
+        major == 9
         and torch.version.cuda >= "12.3"
         and is_fa3_backend_supported(
             pos_encoding_mode,