|
1 | 1 | # SPDX-License-Identifier: Apache-2.0 |
2 | | -# Copyright 2017 The Meson development team |
| 2 | +# Copyright 2017-2025 The Meson development team |
3 | 3 |
|
4 | 4 | from __future__ import annotations |
5 | 5 |
|
@@ -215,42 +215,50 @@ def _nvcc_arch_flags(self, cuda_version: str, cuda_arch_list: AutoArch, detected |
215 | 215 | # except that a bug with cuda_arch_list="All" is worked around by |
216 | 216 | # tracking both lower and upper limits on GPU architectures. |
217 | 217 |
|
218 | | - cuda_known_gpu_architectures = ['Fermi', 'Kepler', 'Maxwell'] # noqa: E221 |
| 218 | + cuda_known_gpu_architectures = [] # noqa: E221 |
219 | 219 | cuda_common_gpu_architectures = ['3.0', '3.5', '5.0'] # noqa: E221 |
220 | 220 | cuda_hi_limit_gpu_architecture = None # noqa: E221 |
221 | 221 | cuda_lo_limit_gpu_architecture = '2.0' # noqa: E221 |
222 | 222 | cuda_all_gpu_architectures = ['3.0', '3.2', '3.5', '5.0'] # noqa: E221 |
223 | 223 |
|
224 | | - if version_compare(cuda_version, '<7.0'): |
225 | | - cuda_hi_limit_gpu_architecture = '5.2' |
| 224 | + # Fermi and Kepler support have been dropped since 12.0 |
| 225 | + if version_compare(cuda_version, '<12.0'): |
| 226 | + cuda_known_gpu_architectures.extend(['Fermi', 'Kepler']) |
226 | 227 |
|
227 | | - if version_compare(cuda_version, '>=7.0'): |
228 | | - cuda_known_gpu_architectures += ['Kepler+Tegra', 'Kepler+Tesla', 'Maxwell+Tegra'] # noqa: E221 |
229 | | - cuda_common_gpu_architectures += ['5.2'] # noqa: E221 |
| 228 | + # Everything older than Turing is dropped by 13.0 |
| 229 | + if version_compare(cuda_version, '<13.0'): |
| 230 | + cuda_known_gpu_architectures.append('Maxwell') |
230 | 231 |
|
231 | | - if version_compare(cuda_version, '<8.0'): |
232 | | - cuda_common_gpu_architectures += ['5.2+PTX'] # noqa: E221 |
233 | | - cuda_hi_limit_gpu_architecture = '6.0' # noqa: E221 |
| 232 | + if version_compare(cuda_version, '<7.0'): |
| 233 | + cuda_hi_limit_gpu_architecture = '5.2' |
234 | 234 |
|
235 | | - if version_compare(cuda_version, '>=8.0'): |
236 | | - cuda_known_gpu_architectures += ['Pascal', 'Pascal+Tegra'] # noqa: E221 |
237 | | - cuda_common_gpu_architectures += ['6.0', '6.1'] # noqa: E221 |
238 | | - cuda_all_gpu_architectures += ['6.0', '6.1', '6.2'] # noqa: E221 |
| 235 | + if version_compare(cuda_version, '>=7.0'): |
| 236 | + cuda_known_gpu_architectures += ['Kepler+Tegra', 'Kepler+Tesla', 'Maxwell+Tegra'] # noqa: E221 |
| 237 | + cuda_common_gpu_architectures += ['5.2'] # noqa: E221 |
239 | 238 |
|
240 | | - if version_compare(cuda_version, '<9.0'): |
241 | | - cuda_common_gpu_architectures += ['6.1+PTX'] # noqa: E221 |
242 | | - cuda_hi_limit_gpu_architecture = '7.0' # noqa: E221 |
| 239 | + if version_compare(cuda_version, '<8.0'): |
| 240 | + cuda_common_gpu_architectures += ['5.2+PTX'] # noqa: E221 |
| 241 | + cuda_hi_limit_gpu_architecture = '6.0' # noqa: E221 |
243 | 242 |
|
244 | | - if version_compare(cuda_version, '>=9.0'): |
245 | | - cuda_known_gpu_architectures += ['Volta', 'Xavier'] # noqa: E221 |
246 | | - cuda_common_gpu_architectures += ['7.0'] # noqa: E221 |
247 | | - cuda_all_gpu_architectures += ['7.0', '7.2'] # noqa: E221 |
248 | | - # https://docs.nvidia.com/cuda/archive/9.0/cuda-toolkit-release-notes/index.html#unsupported-features |
249 | | - cuda_lo_limit_gpu_architecture = '3.0' # noqa: E221 |
| 243 | + if version_compare(cuda_version, '>=8.0'): |
| 244 | + cuda_known_gpu_architectures += ['Pascal', 'Pascal+Tegra'] # noqa: E221 |
| 245 | + cuda_common_gpu_architectures += ['6.0', '6.1'] # noqa: E221 |
| 246 | + cuda_all_gpu_architectures += ['6.0', '6.1', '6.2'] # noqa: E221 |
250 | 247 |
|
251 | | - if version_compare(cuda_version, '<10.0'): |
252 | | - cuda_common_gpu_architectures += ['7.2+PTX'] # noqa: E221 |
253 | | - cuda_hi_limit_gpu_architecture = '8.0' # noqa: E221 |
| 248 | + if version_compare(cuda_version, '<9.0'): |
| 249 | + cuda_common_gpu_architectures += ['6.1+PTX'] # noqa: E221 |
| 250 | + cuda_hi_limit_gpu_architecture = '7.0' # noqa: E221 |
| 251 | + |
| 252 | + if version_compare(cuda_version, '>=9.0'): |
| 253 | + cuda_known_gpu_architectures += ['Volta', 'Xavier'] # noqa: E221 |
| 254 | + cuda_common_gpu_architectures += ['7.0'] # noqa: E221 |
| 255 | + cuda_all_gpu_architectures += ['7.0', '7.2'] # noqa: E221 |
| 256 | + # https://docs.nvidia.com/cuda/archive/9.0/cuda-toolkit-release-notes/index.html#unsupported-features |
| 257 | + cuda_lo_limit_gpu_architecture = '3.0' # noqa: E221 |
| 258 | + |
| 259 | + if version_compare(cuda_version, '<10.0'): |
| 260 | + cuda_common_gpu_architectures += ['7.2+PTX'] # noqa: E221 |
| 261 | + cuda_hi_limit_gpu_architecture = '8.0' # noqa: E221 |
254 | 262 |
|
255 | 263 | if version_compare(cuda_version, '>=10.0'): |
256 | 264 | cuda_known_gpu_architectures += ['Turing'] # noqa: E221 |
@@ -303,6 +311,20 @@ def _nvcc_arch_flags(self, cuda_version: str, cuda_arch_list: AutoArch, detected |
303 | 311 | if version_compare(cuda_version, '<13'): |
304 | 312 | cuda_hi_limit_gpu_architecture = '10.0' # noqa: E221 |
305 | 313 |
|
| 314 | + if version_compare(cuda_version, '>=12.9'): |
| 315 | + cuda_known_gpu_architectures.append('Blackwell') |
| 316 | + cuda_common_gpu_architectures.extend(['10.0', '10.3', '12.0', '12.1']) |
| 317 | + cuda_all_gpu_architectures.extend(['10.0', '10.3', '12.0', '12.1']) |
| 318 | + cuda_lo_limit_gpu_architecture = '7.5' |
| 319 | + |
| 320 | + # 12.9 and 13.0 both export this limit |
| 321 | + if version_compare(cuda_version, '<14'): |
| 322 | + cuda_hi_limit_gpu_architecture = '12.1' |
| 323 | + |
| 324 | + if version_compare(cuda_version, '>=13.0'): |
| 325 | + cuda_common_gpu_architectures.append('11.0') |
| 326 | + cuda_all_gpu_architectures.append('11.0') |
| 327 | + |
306 | 328 | if not cuda_arch_list: |
307 | 329 | cuda_arch_list = 'Auto' |
308 | 330 |
|
@@ -355,6 +377,7 @@ def _nvcc_arch_flags(self, cuda_version: str, cuda_arch_list: AutoArch, detected |
355 | 377 | 'Orin': (['8.7'], []), |
356 | 378 | 'Lovelace': (['8.9'], ['8.9']), |
357 | 379 | 'Hopper': (['9.0'], ['9.0']), |
| 380 | + 'Blackwell': (['10.0'], ['10.0']), |
358 | 381 | }.get(arch_name, (None, None)) |
359 | 382 |
|
360 | 383 | if arch_bin is None: |
|
0 commit comments