Skip to content

Commit ed96caf

Browse files
committed
modules/cuda: Update arch flags for versions through 13.0
This includes adding Blackwell support, as well as the deprecation of many older architectures in 12.9 and 13.0
1 parent 43858a0 commit ed96caf

File tree

2 files changed

+50
-27
lines changed

2 files changed

+50
-27
lines changed

mesonbuild/modules/cuda.py

Lines changed: 49 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# SPDX-License-Identifier: Apache-2.0
2-
# Copyright 2017 The Meson development team
2+
# Copyright 2017-2025 The Meson development team
33

44
from __future__ import annotations
55

@@ -215,42 +215,50 @@ def _nvcc_arch_flags(self, cuda_version: str, cuda_arch_list: AutoArch, detected
215215
# except that a bug with cuda_arch_list="All" is worked around by
216216
# tracking both lower and upper limits on GPU architectures.
217217

218-
cuda_known_gpu_architectures = ['Fermi', 'Kepler', 'Maxwell'] # noqa: E221
218+
cuda_known_gpu_architectures = [] # noqa: E221
219219
cuda_common_gpu_architectures = ['3.0', '3.5', '5.0'] # noqa: E221
220220
cuda_hi_limit_gpu_architecture = None # noqa: E221
221221
cuda_lo_limit_gpu_architecture = '2.0' # noqa: E221
222222
cuda_all_gpu_architectures = ['3.0', '3.2', '3.5', '5.0'] # noqa: E221
223223

224-
if version_compare(cuda_version, '<7.0'):
225-
cuda_hi_limit_gpu_architecture = '5.2'
224+
# Fermi and Kepler support have been dropped since 12.0
225+
if version_compare(cuda_version, '<12.0'):
226+
cuda_known_gpu_architectures.extend(['Fermi', 'Kepler'])
226227

227-
if version_compare(cuda_version, '>=7.0'):
228-
cuda_known_gpu_architectures += ['Kepler+Tegra', 'Kepler+Tesla', 'Maxwell+Tegra'] # noqa: E221
229-
cuda_common_gpu_architectures += ['5.2'] # noqa: E221
228+
# Everything older than Turing is dropped by 13.0
229+
if version_compare(cuda_version, '<13.0'):
230+
cuda_known_gpu_architectures.append('Maxwell')
230231

231-
if version_compare(cuda_version, '<8.0'):
232-
cuda_common_gpu_architectures += ['5.2+PTX'] # noqa: E221
233-
cuda_hi_limit_gpu_architecture = '6.0' # noqa: E221
232+
if version_compare(cuda_version, '<7.0'):
233+
cuda_hi_limit_gpu_architecture = '5.2'
234234

235-
if version_compare(cuda_version, '>=8.0'):
236-
cuda_known_gpu_architectures += ['Pascal', 'Pascal+Tegra'] # noqa: E221
237-
cuda_common_gpu_architectures += ['6.0', '6.1'] # noqa: E221
238-
cuda_all_gpu_architectures += ['6.0', '6.1', '6.2'] # noqa: E221
235+
if version_compare(cuda_version, '>=7.0'):
236+
cuda_known_gpu_architectures += ['Kepler+Tegra', 'Kepler+Tesla', 'Maxwell+Tegra'] # noqa: E221
237+
cuda_common_gpu_architectures += ['5.2'] # noqa: E221
239238

240-
if version_compare(cuda_version, '<9.0'):
241-
cuda_common_gpu_architectures += ['6.1+PTX'] # noqa: E221
242-
cuda_hi_limit_gpu_architecture = '7.0' # noqa: E221
239+
if version_compare(cuda_version, '<8.0'):
240+
cuda_common_gpu_architectures += ['5.2+PTX'] # noqa: E221
241+
cuda_hi_limit_gpu_architecture = '6.0' # noqa: E221
243242

244-
if version_compare(cuda_version, '>=9.0'):
245-
cuda_known_gpu_architectures += ['Volta', 'Xavier'] # noqa: E221
246-
cuda_common_gpu_architectures += ['7.0'] # noqa: E221
247-
cuda_all_gpu_architectures += ['7.0', '7.2'] # noqa: E221
248-
# https://docs.nvidia.com/cuda/archive/9.0/cuda-toolkit-release-notes/index.html#unsupported-features
249-
cuda_lo_limit_gpu_architecture = '3.0' # noqa: E221
243+
if version_compare(cuda_version, '>=8.0'):
244+
cuda_known_gpu_architectures += ['Pascal', 'Pascal+Tegra'] # noqa: E221
245+
cuda_common_gpu_architectures += ['6.0', '6.1'] # noqa: E221
246+
cuda_all_gpu_architectures += ['6.0', '6.1', '6.2'] # noqa: E221
250247

251-
if version_compare(cuda_version, '<10.0'):
252-
cuda_common_gpu_architectures += ['7.2+PTX'] # noqa: E221
253-
cuda_hi_limit_gpu_architecture = '8.0' # noqa: E221
248+
if version_compare(cuda_version, '<9.0'):
249+
cuda_common_gpu_architectures += ['6.1+PTX'] # noqa: E221
250+
cuda_hi_limit_gpu_architecture = '7.0' # noqa: E221
251+
252+
if version_compare(cuda_version, '>=9.0'):
253+
cuda_known_gpu_architectures += ['Volta', 'Xavier'] # noqa: E221
254+
cuda_common_gpu_architectures += ['7.0'] # noqa: E221
255+
cuda_all_gpu_architectures += ['7.0', '7.2'] # noqa: E221
256+
# https://docs.nvidia.com/cuda/archive/9.0/cuda-toolkit-release-notes/index.html#unsupported-features
257+
cuda_lo_limit_gpu_architecture = '3.0' # noqa: E221
258+
259+
if version_compare(cuda_version, '<10.0'):
260+
cuda_common_gpu_architectures += ['7.2+PTX'] # noqa: E221
261+
cuda_hi_limit_gpu_architecture = '8.0' # noqa: E221
254262

255263
if version_compare(cuda_version, '>=10.0'):
256264
cuda_known_gpu_architectures += ['Turing'] # noqa: E221
@@ -303,6 +311,20 @@ def _nvcc_arch_flags(self, cuda_version: str, cuda_arch_list: AutoArch, detected
303311
if version_compare(cuda_version, '<13'):
304312
cuda_hi_limit_gpu_architecture = '10.0' # noqa: E221
305313

314+
if version_compare(cuda_version, '>=12.9'):
315+
cuda_known_gpu_architectures.append('Blackwell')
316+
cuda_common_gpu_architectures.extend(['10.0', '10.3', '12.0', '12.1'])
317+
cuda_all_gpu_architectures.extend(['10.0', '10.3', '12.0', '12.1'])
318+
cuda_lo_limit_gpu_architecture = '7.5'
319+
320+
# 12.9 and 13.0 both export this limit
321+
if version_compare(cuda_version, '<14'):
322+
cuda_hi_limit_gpu_architecture = '12.1'
323+
324+
if version_compare(cuda_version, '>=13.0'):
325+
cuda_common_gpu_architectures.append('11.0')
326+
cuda_all_gpu_architectures.append('11.0')
327+
306328
if not cuda_arch_list:
307329
cuda_arch_list = 'Auto'
308330

@@ -355,6 +377,7 @@ def _nvcc_arch_flags(self, cuda_version: str, cuda_arch_list: AutoArch, detected
355377
'Orin': (['8.7'], []),
356378
'Lovelace': (['8.9'], ['8.9']),
357379
'Hopper': (['9.0'], ['9.0']),
380+
'Blackwell': (['10.0'], ['10.0']),
358381
}.get(arch_name, (None, None))
359382

360383
if arch_bin is None:

run_project_tests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1136,7 +1136,7 @@ def __init__(self, category: str, subdir: str, skip: bool = False, stdout_mandat
11361136
TestCategory('fortran', 'fortran', skip_fortran or backend != Backend.ninja),
11371137
TestCategory('swift', 'swift', backend not in (Backend.ninja, Backend.xcode) or not shutil.which('swiftc')),
11381138
# CUDA tests on Windows: use Ninja backend: python run_project_tests.py --only cuda --backend ninja
1139-
TestCategory('cuda', 'cuda', backend not in (Backend.ninja, Backend.xcode) or not shutil.which('nvcc')),
1139+
TestCategory('cuda', 'cuda', backend not in (Backend.ninja, Backend.xcode)),
11401140
TestCategory('python3', 'python3', backend is not Backend.ninja or 'python3' not in sys.executable),
11411141
TestCategory('python', 'python'),
11421142
TestCategory('fpga', 'fpga', shutil.which('yosys') is None),

0 commit comments

Comments
 (0)