@@ -41,51 +41,64 @@ endif
4141
4242ifeq (70, $(findstring 70, $(SM_ARCH ) ) )
4343 SM_TARGETS += -gencode=arch=compute_70,code=\"sm_70,compute_70\"
44+ CLANG_SM_TARGETS += --cuda-gpu-arch=sm_70
4445endif
4546ifeq (62, $(findstring 62, $(SM_ARCH ) ) )
4647 SM_TARGETS += -gencode=arch=compute_62,code=\"sm_62,compute_62\"
48+ CLANG_SM_TARGETS += --cuda-gpu-arch=sm_62
4749endif
4850ifeq (61, $(findstring 61, $(SM_ARCH ) ) )
4951 SM_TARGETS += -gencode=arch=compute_61,code=\"sm_61,compute_61\"
52+ CLANG_SM_TARGETS += --cuda-gpu-arch=sm_61
5053endif
5154ifeq (60, $(findstring 60, $(SM_ARCH ) ) )
5255 SM_TARGETS += -gencode=arch=compute_60,code=\"sm_60,compute_60\"
56+ CLANG_SM_TARGETS += --cuda-gpu-arch=sm_60
5357endif
5458ifeq (52, $(findstring 52, $(SM_ARCH ) ) )
5559 SM_TARGETS += -gencode=arch=compute_52,code=\"sm_52,compute_52\"
60+ CLANG_SM_TARGETS += --cuda-gpu-arch=sm_52
5661endif
5762ifeq (37, $(findstring 37, $(SM_ARCH ) ) )
5863 SM_TARGETS += -gencode=arch=compute_37,code=\"sm_37,compute_37\"
64+ CLANG_SM_TARGETS += --cuda-gpu-arch=sm_37
5965endif
6066ifeq (35, $(findstring 35, $(SM_ARCH ) ) )
6167 SM_TARGETS += -gencode=arch=compute_35,code=\"sm_35,compute_35\"
68+ CLANG_SM_TARGETS += --cuda-gpu-arch=sm_35
6269endif
6370ifeq (30, $(findstring 30, $(SM_ARCH ) ) )
6471 SM_TARGETS += -gencode=arch=compute_30,code=\"sm_30,compute_30\"
72+ CLANG_SM_TARGETS += --cuda-gpu-arch=sm_30
6573endif
6674ifeq (21, $(findstring 21, $(SM_ARCH ) ) )
6775 SM_TARGETS += -gencode=arch=compute_20,code=\"sm_21,compute_20\"
76+ CLANG_SM_TARGETS += --cuda-gpu-arch=sm_21
6877endif
6978ifeq (20, $(findstring 20, $(SM_ARCH ) ) )
7079 SM_TARGETS += -gencode=arch=compute_20,code=\"sm_20,compute_20\"
80+ CLANG_SM_TARGETS += --cuda-gpu-arch=sm_20
7181endif
7282
7383
7484# [verbose=<0|1>] Verbose toolchain output from nvcc option
7585ifeq ($(verbose ) , 1)
7686 NVCCFLAGS += -v
87+ CLANG_CFLAGS += -v
7788endif
7889
7990
8091# [keep=<0|1>] Keep intermediate compilation artifacts option
8192ifeq ($(keep ) , 1)
8293 NVCCFLAGS += -keep
94+ CLANG_CFLAGS += --save-temps
8395endif
8496
8597
8698# [debug=<0|1>] Generate debug mode code
8799ifeq ($(debug ) , 1)
88100 NVCCFLAGS += -G
101+ CLANG_CFLAGS += --cuda-noopt-device-debug
89102endif
90103
91104
@@ -107,7 +120,7 @@ OSUPPER := $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:])
107120
108121# Default flags: verbose kernel properties (regs, smem, cmem, etc.); runtimes for compilation phases
109122NVCCFLAGS += -O3 -Xptxas -v
110-
123+ CLANG_CFLAGS += -O3 -Xcuda-ptxas -v
111124ifeq (WIN_NT, $(findstring WIN_NT, $(OSUPPER ) ) )
112125 # For MSVC
113126
@@ -139,15 +152,30 @@ else
139152
140153endif
141154
155+ # compiler=clang Enables compilation with clang.
156+
157+ ifeq ($(compiler ) , clang)
158+ # NVCC_VERSION is used as the proxy for the CUDA version.
159+ BIN_SUFFIX := sm$(SM_ARCH)_clang_cuda_$(NVCC_VERSION)
160+ # Clangs needs few extra flags to point it to CUDA SDK
161+ # and link the binaries with CUDA runtime.
162+ CUDA_BASE=$(realpath $(join $(dir $(shell which nvcc)), ..))
163+ CLANG_CFLAGS += --cuda-path=$(CUDA_BASE)
164+ LIBINC += -L$(CUDA_BASE)/lib64 -Wl,-rpath=$(CUDA_BASE)/lib64
165+ LIBS += -lcudart
142166
143- # Suffix to append to each binary
144- BIN_SUFFIX := sm$(SM_ARCH ) _$(NVCC_VERSION )
167+ # Replace NVCC and its options with clang++.
168+ NVCC = clang++
169+ NVCCFLAGS = $(CLANG_CFLAGS)
170+ SM_TARGETS = $(CLANG_SM_TARGETS)
171+ else
172+ # Suffix to append to each binary
173+ BIN_SUFFIX := sm$(SM_ARCH)_nvcc_$(NVCC_VERSION)
174+ endif
145175
146176
147177# -------------------------------------------------------------------------------
148178# Function for computing dependency Lists
149179# -------------------------------------------------------------------------------
150180
151181rwildcard =$(foreach d,$(wildcard $1* ) ,$(call rwildcard,$d/,$2) $(filter $(subst * ,% ,$2) ,$d) )
152-
153-
0 commit comments