Skip to content

Commit 6f0d271

Browse files
authored
CUTLASS v1.0
CUTLASS v1.0 released.
2 parents 8437724 + 923dfb4 commit 6f0d271

File tree

1,830 files changed

+308985
-11159
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,830 files changed

+308985
-11159
lines changed

CMake/bin2hex.cmake

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# A small utility function which generates a C-header from an input file
2+
function(FILE_TO_C_STRING FILENAME VARIABLE_NAME OUTPUT_STRING ZERO_TERMINATED)
3+
FILE(READ "${FILENAME}" HEX_INPUT HEX)
4+
if (${ZERO_TERMINATED})
5+
string(APPEND HEX_INPUT "00")
6+
endif()
7+
8+
string(REGEX REPLACE "(....)" "\\1\n" HEX_OUTPUT ${HEX_INPUT})
9+
string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," HEX_OUTPUT ${HEX_OUTPUT})
10+
11+
set(HEX_OUTPUT "static char const ${VARIABLE_NAME}[] = {\n ${HEX_OUTPUT}\n};\n")
12+
13+
set(${OUTPUT_STRING} "${HEX_OUTPUT}" PARENT_SCOPE)
14+
endfunction()
15+
16+
message("Create header file for ${FILE_IN}")
17+
message("Create header file for ${FILE_OUT}")
18+
file_to_c_string(${FILE_IN} ${VARIABLE_NAME} OUTPUT_STRING ZERO_TERMINATED)
19+
20+
set(RESULT "#pragma once\n")
21+
string(APPEND RESULT "namespace cutlass {\n")
22+
string(APPEND RESULT "namespace nvrtc {\n")
23+
string(APPEND RESULT "${OUTPUT_STRING}")
24+
string(APPEND RESULT "} // namespace nvrtc\n")
25+
string(APPEND RESULT "} // namespace cutlass\n")
26+
file(WRITE "${FILE_OUT}" "${RESULT}")

CMakeLists.txt

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
# Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Redistribution and use in source and binary forms, with or without modification, are permitted
4+
# provided that the following conditions are met:
5+
# * Redistributions of source code must retain the above copyright notice, this list of
6+
# conditions and the following disclaimer.
7+
# * Redistributions in binary form must reproduce the above copyright notice, this list of
8+
# conditions and the following disclaimer in the documentation and/or other materials
9+
# provided with the distribution.
10+
# * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
11+
# to endorse or promote products derived from this software without specific prior written
12+
# permission.
13+
#
14+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
15+
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
16+
# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
17+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
18+
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
19+
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
20+
# STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
21+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22+
23+
cmake_minimum_required(VERSION 3.3.0)
24+
25+
set(CUTLASS_LANGUAGES CXX)
26+
27+
# CMake 3.9.0 has native support for CUDA without the need of the CUDA package. Use it!
28+
if(WIN32 AND NOT ${CMAKE_VERSION} VERSION_LESS "3.9.0")
29+
list(APPEND CUTLASS_LANGUAGES CUDA)
30+
set(CUTLASS_NATIVE_CUDA TRUE)
31+
32+
macro(cutlass_add_executable)
33+
add_executable(${ARGN})
34+
endmacro()
35+
else()
36+
# FindCUDA fails to detect VS 2017 due to a changed directory format of the toolkits.
37+
# For this configuration we need CMake >= 3.9.0 to use the native CUDA support.
38+
if (WIN32 AND MSVC_VERSION GREATER 1800)
39+
message(FATAL_ERROR "Please upgrade CMake to version >= 3.9.0 to support Visual Studio 2017 or higher")
40+
endif()
41+
42+
# Fall back to the FindCUDA version to create an executable with CUDA files
43+
macro(cutlass_add_executable)
44+
cuda_add_executable(${ARGN})
45+
endmacro()
46+
endif()
47+
48+
project(CUTLASS ${CUTLASS_LANGUAGES})
49+
50+
# check if the configuration is supported
51+
if( NOT CMAKE_SIZEOF_VOID_P EQUAL 8 )
52+
message(FATAL_ERROR "CUTLASS requires a 64-bit compiler!")
53+
endif()
54+
55+
find_package(CUDA)
56+
find_package(Doxygen QUIET)
57+
58+
# By default we want to build in Release mode to ensure that we're getting best performance
59+
if (NOT (CMAKE_BUILD_TYPE OR CONFIGURATION_TYPES))
60+
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose build level" FORCE)
61+
# We do support Debug or Release builds
62+
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release")
63+
endif()
64+
65+
if(WIN32)
66+
# On Windows we link against the shared (DLL) runtime. Change gtest settings to match this.
67+
set(gtest_force_shared_crt ON CACHE BOOL "Use shared (DLL) run-time lib even when Google Test is built as static lib" FORCE)
68+
endif()
69+
70+
if (WIN32)
71+
# Enable more warnings and treat as errors
72+
string(APPEND NVCC_FLAGS " -Xcompiler /W3 -Xcompiler /WX")
73+
74+
# Disable excess x86 floating point precision that can lead to results being labeled incorrectly
75+
string(APPEND NVCC_FLAGS " -Xcompiler /fp:strict")
76+
77+
# Verbose option
78+
if (${CUTLASS_NVCC_VERBOSE})
79+
string(APPEND NVCC_FLAGS " -v")
80+
endif()
81+
endif(WIN32)
82+
83+
# Configure CUDA options
84+
set(CUTLASS_NVCC_ARCHS "50;60;61;70" CACHE STRING "The SM architectures to build code for.")
85+
set(CUTLASS_NVCC_KEEP OFF CACHE BOOL "Keep intermediate files generated by NVCC.")
86+
87+
foreach(ARCH ${CUTLASS_NVCC_ARCHS})
88+
string(APPEND NVCC_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}")
89+
endforeach()
90+
91+
92+
if (CUTLASS_NVCC_KEEP)
93+
string(APPEND NVCC_FLAGS " -keep")
94+
endif()
95+
96+
if (WIN32 AND CUTLASS_NATIVE_CUDA)
97+
string(APPEND NVCC_FLAGS_RELEASE " -lineinfo")
98+
else()
99+
string(APPEND NVCC_FLAGS " -lineinfo")
100+
endif()
101+
102+
if (UNIX)
103+
string(APPEND NVCC_FLAGS " -Xcompiler -Wconversion")
104+
endif()
105+
106+
string(APPEND NVCC_FLAGS_DEBUG " -g")
107+
string(APPEND NVCC_FLAGS_RELEASE " -O3")
108+
109+
# define NDEBUG for release mode to disable assertions
110+
string(APPEND NVCC_FLAGS_RELEASE " -DNDEBUG")
111+
112+
if (CUTLASS_NATIVE_CUDA)
113+
set(CMAKE_CUDA_FLAGS "${NVCC_FLAGS}")
114+
set(CMAKE_CUDA_FLAGS_DEBUG "${NVCC_FLAGS_DEBUG}")
115+
set(CMAKE_CUDA_FLAGS_RELEASE "${NVCC_FLAGS_RELEASE}")
116+
else()
117+
set(CUDA_NVCC_FLAGS ${NVCC_FLAGS})
118+
set(CUDA_NVCC_FLAGS_DEBUG ${NVCC_FLAGS_DEBUG})
119+
set(CUDA_NVCC_FLAGS_RELEASE ${NVCC_FLAGS_RELEASE})
120+
endif()
121+
122+
#
123+
# The following items should eventually be pushed into cutlass/CMakeLists.txt
124+
#
125+
126+
# GLOB for CUTLASS header files. Should we use a static list instead?
127+
file(GLOB CUTLASS_GEMM RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} cutlass/gemm/*.h)
128+
file(GLOB CUTLASS_UTIL RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} cutlass/util/*.h)
129+
file(GLOB CUTLASS_DEVICE RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} cutlass/device/*.h)
130+
file(GLOB CUTLASS_CORE RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} cutlass/*.h)
131+
132+
source_group("cutlass\\gemm" FILES ${CUTLASS_GEMM})
133+
source_group("cutlass\\util" FILES ${CUTLASS_UTIL})
134+
source_group("cutlass\\device" FILES ${CUTLASS_DEVICE})
135+
source_group("cutlass" FILES ${CUTLASS_CORE})
136+
137+
add_library(CUTLASS INTERFACE)
138+
include_directories("${CMAKE_CURRENT_SOURCE_DIR}")
139+
target_sources(CUTLASS INTERFACE
140+
${CUTLASS_GEMM}
141+
${CUTLASS_UTIL}
142+
${CUTLASS_DEVICE}
143+
${CUTLASS_CORE}
144+
)
145+
146+
target_include_directories(CUTLASS INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
147+
148+
# Create a custom target to ensure that the CUTLASS sources are visible in an IDE
149+
add_custom_target(cutlass_ide SOURCES
150+
${CUTLASS_GEMM}
151+
${CUTLASS_UTIL}
152+
${CUTLASS_DEVICE}
153+
${CUTLASS_CORE}
154+
)
155+
# Doxygen is available. Generate documentation
156+
if (DOXYGEN_FOUND)
157+
# DOT is available. Enable graph generation in the documentation
158+
if (DOXYGEN_DOT_EXECUTABLE)
159+
set(CUTLASS_ENABLE_DOXYGEN_DOT ON CACHE BOOL "Use dot to generate graphs in the doxygen documentation.")
160+
else()
161+
set(CUTLASS_ENABLE_DOXYGEN_DOT OFF CACHE BOOL "Use dot to generate graphs in the doxygen documentation." FORCE)
162+
endif()
163+
164+
if (CUTLASS_ENABLE_DOXYGEN_DOT)
165+
set(HAVE_DOT "YES")
166+
else()
167+
set(HAVE_DOT "NO")
168+
endif()
169+
170+
# Add custom target for Doxygen.
171+
add_custom_target(cutlass_docs ${CMAKE_COMMAND} -E env
172+
"DOT_PATH=${DOXYGEN_DOT_EXECUTABLE}"
173+
"HAVE_DOT=${HAVE_DOT}"
174+
${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile
175+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
176+
VERBATIM
177+
)
178+
endif()
179+
180+
181+
#add_subdirectory(examples/gemm)
182+
add_subdirectory(tools)

Doxyfile

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ PROJECT_LOGO =
5858
# entered, it will be relative to the location where doxygen was started. If
5959
# left blank the current directory will be used.
6060

61-
OUTPUT_DIRECTORY = doxygen
61+
OUTPUT_DIRECTORY = docs
6262

6363
# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 4096 sub-
6464
# directories (in 2 levels) under the output directory of each output format and
@@ -218,7 +218,8 @@ TAB_SIZE = 4
218218
# "Side Effects:". You can put \n's in the value part of an alias to insert
219219
# newlines.
220220

221-
ALIASES =
221+
#ALIASES += "concept{1}=@ingroup \1\n@par Implemented concepts:\n@ref \1"
222+
ALIASES += "concept{1}=@ingroup \1"
222223

223224
# This tag can be used to specify a number of word-keyword mappings (TCL only).
224225
# A mapping has the form "name=value". For example adding "class=itcl::class"
@@ -396,7 +397,7 @@ LOOKUP_CACHE_SIZE = 0
396397
# normally produced when WARNINGS is set to YES.
397398
# The default value is: NO.
398399

399-
EXTRACT_ALL = NO
400+
EXTRACT_ALL = YES
400401

401402
# If the EXTRACT_PRIVATE tag is set to YES all private members of a class will
402403
# be included in the documentation.
@@ -733,7 +734,7 @@ WARN_LOGFILE =
733734
# spaces.
734735
# Note: If this tag is empty the current directory is searched.
735736

736-
INPUT = cutlass cutlass/gemm cutlass/util
737+
INPUT = cutlass
737738

738739
# This tag can be used to specify the character encoding of the source files
739740
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
@@ -759,7 +760,7 @@ FILE_PATTERNS =
759760
# be searched for input files as well.
760761
# The default value is: NO.
761762

762-
RECURSIVE = NO
763+
RECURSIVE = YES
763764

764765
# The EXCLUDE tag can be used to specify files and/or directories that should be
765766
# excluded from the INPUT source files. This way you can easily exclude a
@@ -2032,7 +2033,7 @@ HIDE_UNDOC_RELATIONS = YES
20322033
# set to NO
20332034
# The default value is: NO.
20342035

2035-
HAVE_DOT = NO
2036+
HAVE_DOT = $(HAVE_DOT)
20362037

20372038
# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
20382039
# to run in parallel. When set to 0 doxygen will base this on the number of
@@ -2204,7 +2205,7 @@ INTERACTIVE_SVG = NO
22042205
# found. If left blank, it is assumed the dot tool can be found in the path.
22052206
# This tag requires that the tag HAVE_DOT is set to YES.
22062207

2207-
DOT_PATH =
2208+
DOT_PATH = $(DOT_PATH)
22082209

22092210
# The DOTFILE_DIRS tag can be used to specify one or more directories that
22102211
# contain dot files that are included in the documentation (see the \dotfile

0 commit comments

Comments
 (0)