forked from NVIDIA/DALI
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCUDA_utils.cmake
195 lines (175 loc) · 7.73 KB
/
CUDA_utils.cmake
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Use CMAKE_CUDA_COMPILER to obtain the path to CUDA toolkint.
# Needed when compiling with Clang only
function(CUDA_get_toolkit_from_compiler TOOLKIT_PATH)
get_filename_component(TOOLKIT_PATH_TMP_VAR "${CMAKE_CUDA_COMPILER}/../.." ABSOLUTE)
set(${TOOLKIT_PATH} ${TOOLKIT_PATH_TMP_VAR} PARENT_SCOPE)
endfunction()
# When compiling CUDA with Clang only (DALI_CLANG_ONLY=ON), we need to change the
# language properties of .cu files to allow them to use the CXX compiler (which will be Clang).
# Setting that property has narrow scope of current CMakeLists.txt, so we do this at the point
# just before creating a target.
# Clang will compile files as CUDA based on their extension.
function(adjust_source_file_language_property SOURCES)
if (DALI_CLANG_ONLY)
foreach(File IN LISTS SOURCES)
if(File MATCHES ".*\.cu$")
set_source_files_properties(${File} PROPERTIES LANGUAGE CXX)
endif()
endforeach()
endif()
endfunction()
# List of currently used arch values
if (${ARCH} MATCHES "aarch64-")
# aarch64-linux and aarch64-qnx
set(CUDA_known_archs "53" "62" "72" "75")
elseif (${ARCH} MATCHES "aarch64")
# aarch64 SBSA, only >=Volta
set(CUDA_known_archs "70" "75" "80" "86")
else()
set(CUDA_known_archs "35" "50" "52" "60" "61" "70" "75" "80" "86")
endif()
set(CUDA_TARGET_ARCHS ${CUDA_known_archs} CACHE STRING "List of target CUDA architectures")
if ("${CUDA_TARGET_ARCHS}" STREQUAL "")
message("CUDA_TARGET_ARCHS cannot be empty, setting to the default")
set(CUDA_TARGET_ARCHS ${CUDA_known_archs} CACHE STRING "List of target CUDA architectures" FORCE)
endif()
# Find if passing `flags` to CUDA compiler producess success or failure
# Unix only
#
# Equivalent to dry-running preprocessing on /dev/null as .cu file
# and checking the exit code
# $ nvcc ${flags} --dryrun -E -x cu /dev/null
# or
# $ clang++ ${flags} -E -x cuda /dev/null
#
# @param out_status TRUE iff exit code is 0, FALSE otherwise
# @param flags flags to check
# @return out_status
function(CUDA_check_cudacc_flag out_status compiler flags)
if (${compiler} MATCHES "clang")
set(preprocess_empty_cu_file "-E" "-x" "cuda" "/dev/null")
else()
set(preprocess_empty_cu_file "--dryrun" "-E" "-x" "cu" "/dev/null")
endif()
set(cudacc_command ${flags} ${preprocess_empty_cu_file})
# Run the compiler and check the exit status
execute_process(COMMAND ${compiler} ${cudacc_command}
RESULT_VARIABLE tmp_out_status
OUTPUT_QUIET
ERROR_QUIET
)
if (${tmp_out_status} EQUAL 0)
set(${out_status} TRUE PARENT_SCOPE)
else()
set(${out_status} FALSE PARENT_SCOPE)
endif()
endfunction()
# Given the list of arch values, check which are supported by
#
# @param out_arch_values_allowed List of arch values supported by the specified compiler
# @param compiler What compiler to use for this check
# @param arch_values_to_check List of values to be checked against the specified compiler
# for example: 60;61;70;75
# @return out_arch_values_allowed
function(CUDA_find_supported_arch_values out_arch_values_allowed compiler arch_values_to_check)
# allow the user to pass the list like a normal variable
set(arch_list ${arch_values_to_check} ${ARGN})
foreach(arch IN LISTS arch_list ITEMS)
if (${compiler} MATCHES "clang")
CUDA_check_cudacc_flag(supported ${compiler} "--cuda-gpu-arch=sm_${arch}")
else()
CUDA_check_cudacc_flag(supported ${compiler} "-arch=sm_${arch}")
endif()
if (supported)
set(out_list ${out_list} ${arch})
endif()
endforeach(arch)
set(${out_arch_values_allowed} ${out_list} PARENT_SCOPE)
endfunction()
# Generate -gencode arch=compute_XX,code=sm_XX or --cuda-gpu-arch=sm_XX for list of supported
# arch values based on the specified compiler.
# List should be sorted in increasing order.
#
# If nvcc is used, the last arch value will be repeated as -gencode arch=compute_XX,code=compute_XX
# to ensure the generation of PTX for most recent virtual architecture
# and maintain forward compatibility.
#
# @param out_args_string output string containing appropriate CMAKE_CUDA_FLAGS/CMAKE_CXX_FLAGS
# @param compiler What compiler to generate flags for
# @param arch_values list of arch values to use
# @return out_args_string
function(CUDA_get_gencode_args out_args_string compiler arch_values)
# allow the user to pass the list like a normal variable
set(arch_list ${arch_values} ${ARGN})
set(out "")
foreach(arch IN LISTS arch_list)
if (${compiler} MATCHES "clang")
set(out "${out} --cuda-gpu-arch=sm_${arch}")
else()
set(out "${out} -gencode arch=compute_${arch},code=sm_${arch}")
endif()
endforeach(arch)
if (NOT ${compiler} MATCHES "clang")
# Repeat the last one as to ensure the generation of PTX for most
# recent virtual architecture for forward compatibility
list(GET arch_list -1 last_arch)
set(out "${out} -gencode arch=compute_${last_arch},code=compute_${last_arch}")
endif()
set(${out_args_string} ${out} PARENT_SCOPE)
endfunction()
# Generate list of xx-real for every specified supported architecture.
# List should be sorted in increasing order.
#
# The last one will also be repeated as xx-virtual to ensure the generation of PTX for most recent
# virtual architecture and maintain forward compatibility.
function(CUDA_get_cmake_cuda_archs out_args_list arch_values)
# allow the user to pass the list like a normal variable
set(arch_list ${arch_values} ${ARGN})
set(out "")
foreach(arch IN LISTS arch_list)
set(out "${out};${arch}-real")
endforeach(arch)
# Repeat the last one as to ensure the generation of PTX for most
# recent virtual architecture for forward compatibility
list(GET arch_list -1 last_arch)
set(out "${out};${last_arch}-virtual")
set(${out_args_list} ${out} PARENT_SCOPE)
endfunction()
function(CUDA_find_library out_path lib_name)
find_library(${out_path} ${lib_name} PATHS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}
PATH_SUFFIXES lib lib64)
endfunction()
function(CUDA_find_library_stub out_path lib_name)
find_library(${out_path} ${lib_name} PATHS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}
PATH_SUFFIXES lib/stubs lib64/stubs)
endfunction()
function(CUDA_remove_toolkit_include_dirs include_dirs)
if (NOT ${CMAKE_CUDA_TOOLKIT_ROOT})
CUDA_get_toolkit_from_compiler(CUDA_TOOLKIT_PATH_VAR)
else()
set(CUDA_TOOLKIT_PATH_VAR ${CMAKE_CUDA_TOOLKIT_ROOT})
endif()
list(REMOVE_ITEM ${include_dirs} "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" "${CUDA_TOOLKIT_PATH_VAR}/include")
set(${include_dirs} ${${include_dirs}} PARENT_SCOPE)
endfunction()
function(CUDA_move_toolkit_include_dirs_to_end)
get_property(tmp_include_dirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES)
CUDA_remove_toolkit_include_dirs(tmp_include_dirs)
set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES ${tmp_include_dirs})
include_directories(SYSTEM "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}")
get_property(tmp_include_dirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES)
message("\nInclude directories = ${tmp_include_dirs}\n")
endfunction()