Skip to content

Commit f338998

Browse files
author
Erik Zenker
committed
Merge pull request #49 from ComputationalRadiationPhysics/dev
Merging dev to master for release 1.1
2 parents 08ddeda + 88d45f3 commit f338998

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+5552
-1149
lines changed

CMakeLists.txt

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
project(HASEonGPU)
2+
cmake_minimum_required(VERSION 2.8.10.1)
3+
4+
# helper for libs and packages
5+
set(CMAKE_PREFIX_PATH "/usr/lib/x86_64-linux-gnu/"
6+
"$ENV{MPI_ROOT}" "$ENV{CUDA_ROOT}" "$ENV{BOOST_ROOT}")
7+
8+
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/utils/cmake/modules/" ${CMAKE_MODULE_PATH})
9+
10+
###############################################################################
11+
# CUDA
12+
###############################################################################
13+
find_package(CUDA 5.0 REQUIRED)
14+
15+
if(CUDA_VERSION VERSION_LESS 5.5)
16+
message(STATUS "CUDA Toolkit < 5.5 detected. We strongly recommend to still "
17+
"use CUDA 5.5+ drivers (319.82 or higher)!")
18+
endif(CUDA_VERSION VERSION_LESS 5.5)
19+
20+
set(CUDA_ARCH sm_20 CACHE STRING "Set GPU architecture")
21+
string(COMPARE EQUAL ${CUDA_ARCH} "sm_10" IS_CUDA_ARCH_UNSUPPORTED)
22+
string(COMPARE EQUAL ${CUDA_ARCH} "sm_11" IS_CUDA_ARCH_UNSUPPORTED)
23+
string(COMPARE EQUAL ${CUDA_ARCH} "sm_12" IS_CUDA_ARCH_UNSUPPORTED)
24+
string(COMPARE EQUAL ${CUDA_ARCH} "sm_13" IS_CUDA_ARCH_UNSUPPORTED)
25+
26+
if(IS_CUDA_ARCH_UNSUPPORTED)
27+
message(FATAL_ERROR "Unsupported CUDA architecture ${CUDA_ARCH} specified. "
28+
"SM 2.0 or higher is required.")
29+
endif(IS_CUDA_ARCH_UNSUPPORTED)
30+
31+
set(CUDA_FTZ "--ftz=false" CACHE STRING "Set flush to zero for GPU")
32+
33+
set(CUDA_MATH --use_fast_math CACHE STRING "Enable fast-math" )
34+
option(CUDA_SHOW_REGISTER "Show kernel registers and create PTX" OFF)
35+
option(CUDA_KEEP_FILES "Keep all intermediate files that are generated during internal compilation steps (folder: nvcc_tmp)" OFF)
36+
option(CUDA_SHOW_CODELINES "Show kernel lines in cuda-gdb and cuda-memcheck" OFF)
37+
38+
if(CUDA_SHOW_CODELINES)
39+
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}" --source-in-ptx -Xcompiler -rdynamic -lineinfo)
40+
set(CUDA_KEEP_FILES ON CACHE BOOL "activate keep files" FORCE)
41+
endif(CUDA_SHOW_CODELINES)
42+
43+
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${nvcc_flags} -arch=${CUDA_ARCH} ${CUDA_MATH} ${CUDA_FTZ})
44+
if(CUDA_SHOW_REGISTER)
45+
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}" -Xptxas=-v)
46+
endif(CUDA_SHOW_REGISTER)
47+
48+
if(CUDA_KEEP_FILES)
49+
make_directory("${PROJECT_BINARY_DIR}/nvcc_tmp")
50+
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}" --keep --keep-dir "${PROJECT_BINARY_DIR}/nvcc_tmp")
51+
endif(CUDA_KEEP_FILES)
52+
53+
# save the current configuration to be used with the linker.
54+
# any further additions to CUDA_NVCC_FLAGS will NOT be passed to
55+
# the linker. See the modified FindCUDA.cmake module
56+
set(CUDA_NVCC_LINKER_FLAGS ${CUDA_NVCC_FLAGS})
57+
58+
###############################################################################
59+
# Boost
60+
###############################################################################
61+
find_package(Boost 1.48.0 REQUIRED COMPONENTS program_options filesystem)
62+
include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
63+
set(LIBS ${LIBS} ${Boost_LIBRARIES})
64+
65+
# nvcc + boost 1.55 work around
66+
# This must NOT be passed to the nvcc linker, but only to the compiler
67+
if(Boost_VERSION EQUAL 105500)
68+
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} \"-DBOOST_NOINLINE=__attribute__((noinline))\" ")
69+
endif(Boost_VERSION EQUAL 105500)
70+
71+
72+
################################################################################
73+
# Find MPI
74+
################################################################################
75+
find_package(MPI REQUIRED)
76+
include_directories(SYSTEM ${MPI_C_INCLUDE_PATH})
77+
set(LIBS ${LIBS} ${MPI_C_LIBRARIES})
78+
79+
# bullxmpi fails if it can not find its c++ counter part
80+
if(MPI_CXX_FOUND)
81+
set(LIBS ${LIBS} ${MPI_CXX_LIBRARIES})
82+
endif(MPI_CXX_FOUND)
83+
84+
85+
################################################################################
86+
# Find PThreads
87+
################################################################################
88+
find_package(Threads REQUIRED)
89+
set(LIBS ${LIBS} ${CMAKE_THREAD_LIBS_INIT})
90+
91+
92+
################################################################################
93+
# Compiler Flags
94+
################################################################################
95+
# GNU
96+
if(CMAKE_COMPILER_IS_GNUCXX)
97+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
98+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
99+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra")
100+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas")
101+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
102+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Waddress")
103+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Warray-bounds")
104+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wchar-subscripts")
105+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wcomment")
106+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat")
107+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wmain")
108+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wmissing-braces")
109+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wparentheses")
110+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wreturn-type")
111+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wsequence-point")
112+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wsign-compare")
113+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wstrict-aliasing")
114+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wstrict-overflow=1")
115+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wswitch")
116+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wtrigraphs")
117+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wuninitialized")
118+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wunused-function")
119+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wunused-label")
120+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wunused-value")
121+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wunused-variable")
122+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wvolatile-register-var")
123+
# new warning in gcc 4.8 (flag ignored in previous version)
124+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-local-typedefs")
125+
# ICC
126+
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
127+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
128+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBOOST_NO_VARIADIC_TEMPLATES")
129+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBOOST_NO_CXX11_VARIADIC_TEMPLATES")
130+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBOOST_NO_FENV_H")
131+
# PGI
132+
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "PGI")
133+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Minform=inform")
134+
endif()
135+
136+
137+
################################################################################
138+
# Build type (debug, release)
139+
################################################################################
140+
option(HASE_RELEASE "Build release version, disables all runtime asserts" OFF)
141+
if(HASE_RELEASE)
142+
message(STATUS "Release version")
143+
144+
set(CMAKE_BUILD_TYPE Release)
145+
add_definitions(-DNDEBUG)
146+
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}" "-Xcompiler=-pthread")
147+
else(HASE_RELEASE)
148+
message(STATUS "Debug version")
149+
150+
set(CMAKE_BUILD_TYPE Debug)
151+
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}" -g "-Xcompiler=-g,-pthread")
152+
endif(HASE_RELEASE)
153+
154+
155+
###############################################################################
156+
# Warning for Thrust-related Bug
157+
###############################################################################
158+
function(warn_path_cuda PATH_VARIABLE)
159+
string(TOLOWER "$ENV{${PATH_VARIABLE}}" CPLUS_TOLOWER)
160+
string(FIND "${CPLUS_TOLOWER}" "cuda" CPLUS_TOLOWER_FOUND)
161+
if(NOT "${CPLUS_TOLOWER_FOUND}" MATCHES "-1")
162+
message(WARNING
163+
"Your ${PATH_VARIABLE} seems to contain CUDA includes."
164+
"This might result in the following error:\n"
165+
"'error: kernel launches from templates are not allowed in system files'\n"
166+
"To fix the problem, remove the CUDA includes from the path."
167+
"(A better place for these includes might be your CPATH)\n"
168+
"source: https://code.google.com/p/thrust/issues/detail?id=359#c5 \n"
169+
"issue: https://github.com/ComputationalRadiationPhysics/haseongpu/issues/26"
170+
"CPATH: https://gcc.gnu.org/onlinedocs/cpp/Environment-Variables.html \n")
171+
endif(NOT "${CPLUS_TOLOWER_FOUND}" MATCHES "-1")
172+
endfunction(warn_path_cuda)
173+
174+
warn_path_cuda("CPLUS_INCLUDE_PATH")
175+
warn_path_cuda("C_INCLUDE_PATH")
176+
177+
178+
###############################################################################
179+
# Executables
180+
###############################################################################
181+
set(HASEonGPU_NAME "calcPhiASE")
182+
183+
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
184+
cuda_include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
185+
186+
file(GLOB SRCFILES "${CMAKE_CURRENT_SOURCE_DIR}/src/*.c*")
187+
188+
set(CUDA_SEPARABLE_COMPILATION ON)
189+
cuda_add_executable(${HASEonGPU_NAME} ${SRCFILES})
190+
191+
target_link_libraries(${HASEonGPU_NAME} ${LIBS} ${CUDA_CUDART_LIBRARY})
192+
193+
194+
###############################################################################
195+
# more targets (make examples, make new, make doc)
196+
###############################################################################
197+
add_custom_target(
198+
examples
199+
ALL
200+
COMMAND mkdir -p ${CMAKE_CURRENT_SOURCE_DIR}/example/matlab_example/bin
201+
COMMAND mkdir -p ${CMAKE_CURRENT_SOURCE_DIR}/example/c_example/bin
202+
COMMAND mkdir -p ${CMAKE_CURRENT_SOURCE_DIR}/example/c_example/output
203+
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/${HASEonGPU_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/example/matlab_example/bin
204+
COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/${HASEonGPU_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/example/c_example/bin
205+
COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/src/${HASEonGPU_NAME}.m ${CMAKE_CURRENT_SOURCE_DIR}/example/matlab_example
206+
COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/src/${HASEonGPU_NAME}.m ${CMAKE_CURRENT_SOURCE_DIR}/.
207+
DEPENDS ${HASEonGPU_NAME}
208+
)
209+
210+
add_custom_target(
211+
new
212+
COMMAND make clean
213+
COMMAND make
214+
)
215+
216+
# for later...
217+
#
218+
#find_package(Doxygen)
219+
#if(DOXYGEN_FOUND)
220+
# configure_file(${CMAKE_CURRENT_SOURCE_DIR}/Doxygen.conf ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile @ONLY)
221+
# add_custom_target(doc
222+
# ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
223+
# WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
224+
# COMMENT "Generating API documentation with Doxygen" VERBATIM
225+
# )
226+
#endif(DOXYGEN_FOUND)

LICENSE.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
HASENonGPU - Licenses
1+
HASEonGPU - Licenses
22
================================================================================
33

44
**Copyright 2014** Erik Zenker, Carlchristian Eckert, Marius Melzer,
55
Daniel Albach
66

77

8-
HASENonGPU is a fast GPU-implementation of a Monte-Carlo
8+
HASEonGPU is a fast GPU-implementation of a Monte-Carlo
99
based ASE-Flux simulation
1010

1111

Makefile

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ LIBS = -lpthread -lcudart -lm
1212
ARCH = -gencode=arch=compute_20,code=sm_20 -gencode=arch=compute_35,code=sm_35
1313
#NVCC_FLAGS = --use_fast_math -Xptxas="-v"
1414
#DEBUG_FLAGS = -g -G -lineinfo -D THRUST_DEBUG
15-
NVCC_FLAGS = --use_fast_math
15+
NVCC_FLAGS = --use_fast_math "-DBOOST_NOINLINE=__attribute__((noinline))"
1616
GCC_FLAGS = -std=c++0x -J 8 -O2
1717
#DEV_FLAGS = --compiler-options="-Wall -Wextra"
1818
DEV_FLAGS = --compiler-options="-Wextra -Waddress -Warray-bounds -Wchar-subscripts -Wcomment -Wformat -Wmain -Wmissing-braces -Wparentheses -Wreturn-type -Wsequence-point -Wsign-compare -Wstrict-aliasing -Wstrict-overflow=1 -Wswitch -Wtrigraphs -Wuninitialized -Wunused-function -Wunused-label -Wunused-value -Wunused-variable -Wvolatile-register-var -Werror"
@@ -52,3 +52,15 @@ clean:
5252
new:
5353
@make clean
5454
@make
55+
56+
archive:
57+
rm -f program_sources.zip
58+
zip -r program_sources.zip src
59+
zip -r program_sources.zip example
60+
zip -r program_sources.zip CMakeLists.txt
61+
zip -r program_sources.zip include
62+
zip -r program_sources.zip LICENSE.md
63+
zip -r program_sources.zip README.md
64+
zip -r program_sources.zip REFERENCE.md
65+
zip -r program_sources.zip COPYING
66+
zip -r program_sources.zip utils

0 commit comments

Comments
 (0)