diff --git a/csrc/device_lower/analysis/device_version.cpp b/csrc/device_lower/analysis/device_version.cpp index 98b4a7300d0..3336040031d 100644 --- a/csrc/device_lower/analysis/device_version.cpp +++ b/csrc/device_lower/analysis/device_version.cpp @@ -5,6 +5,8 @@ * SPDX-License-Identifier: BSD-3-Clause */ // clang-format on +#include + #include #include #include @@ -19,9 +21,22 @@ void MinimumDeviceVersion::dispatch(Val* val) { } if (val->dtype() == DataType::Float8_e4m3fn || val->dtype() == DataType::Float8_e5m2) { +// See release note +// https://docs.nvidia.com/cuda/archive/12.1.0/parallel-thread-execution/index.html#ptx-isa-version-8-1 +#if (CUDA_VERSION >= 12010) ensureVersion( - {9, 0}, + {8, 9}, + "Fusion contains Float8_xxx values which was introduced in Ada (8.9)"); +// See release note +// https://docs.nvidia.com/cuda/archive/11.8.0/parallel-thread-execution/index.html#ptx-isa-version-7-8 +#elif (CUDA_VERSION >= 11080) + ensureVersion( + {8, 9}, "Fusion contains Float8_xxx values which was introduced in Hopper (9.0)"); +#else + NVF_ERROR( + "Fusion contains Float8_xxx values which was not supported in given CUDA version"); +#endif // (CUDA_VERSION >= 12010) } IterVisitor::dispatch(val); } diff --git a/tests/cpp/test_gpu1.cpp b/tests/cpp/test_gpu1.cpp index 05faa1d5b60..093265b2dcf 100644 --- a/tests/cpp/test_gpu1.cpp +++ b/tests/cpp/test_gpu1.cpp @@ -2711,13 +2711,17 @@ TEST_F(NVFuserTest, FusionFp8CastOps_CUDA) { std::vector inputs = {input1}; KernelExecutor ke; - +#if (CUDA_VERSION >= 12010) + if (!deviceMajorMinorCheck(8, 9)) { +#elif (CUDA_VERSION >= 11080) if (!deviceMajorMinorCheck(9)) { +#else + if (true) { +#endif ASSERT_THAT( [&]() { ke.compile(&fusion, inputs); }, testing::ThrowsMessage(testing::HasSubstr( - "Reason: Fusion contains Float8_xxx values which was introduced in Hopper (9.0)"))); - GTEST_SKIP() << "skipping tests on pre-HOPPER GPUs"; + "Reason: Fusion contains Float8_xxx values"))); } else { ke.compile(&fusion, inputs); auto outputs = ke.run(inputs);