Skip to content

Commit

Permalink
Enable NVTX profiling information for CUDA 10 by default (NVIDIA#1793)
Browse files Browse the repository at this point in the history
- CUDA 10 provides header only NVTX library so DALI doesn't have any runtime dependency. This PR enables NVTX for CUDA 10 by default

Signed-off-by: Janusz Lisiecki <[email protected]>
  • Loading branch information
JanuszL authored Mar 11, 2020
1 parent 03b9368 commit 41efe98
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 25 deletions.
13 changes: 12 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,21 @@ endif()

project(DALI CUDA CXX C)
set(DALI_ROOT ${PROJECT_SOURCE_DIR})
set(CUDA_VERSION "${CMAKE_CUDA_COMPILER_VERSION}")

# Build options
option(BUILD_TEST "Build googletest test suite" ON)
option(BUILD_BENCHMARK "Build benchmark suite" ON)
option(BUILD_NVTX "Build with NVTX profiling enabled" OFF)
# if BUILD_NVTX is empty remove it and let is be default
if ("${BUILD_NVTX}" STREQUAL "")
unset(BUILD_NVTX CACHE)
endif()
# starting from CUDA 10.0 we enabling NVTX by default as it doesn't have any linkage dependency
if(${CUDA_VERSION} VERSION_GREATER_EQUAL "10.0")
option(BUILD_NVTX "Build with NVTX profiling enabled" ON)
else()
option(BUILD_NVTX "Build with NVTX profiling enabled" OFF)
endif()
option(BUILD_PYTHON "Build Python bindings" ON)
option(BUILD_LMDB "Build LMDB readers" OFF)
option(BUILD_JPEG_TURBO "Build with libjpeg-turbo support" ON)
Expand All @@ -47,6 +57,7 @@ set(BUILD_FFMPEG ${BUILD_NVDEC})
include(cmake/Utils.cmake)
include(cmake/CUDA_utils.cmake)

propagate_option(BUILD_NVTX)
propagate_option(BUILD_PYTHON)
propagate_option(BUILD_LMDB)
propagate_option(BUILD_JPEG_TURBO)
Expand Down
11 changes: 5 additions & 6 deletions cmake/Dependencies.aarch64-linux.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_HOST})
set(CUDA_TOOLKIT_TARGET_DIR ${CUDA_TARGET})

set(CUDA_VERSION "${CMAKE_CUDA_COMPILER_VERSION}")

set(CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES "${CUDA_TARGET}/lib")
set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "${CUDA_TARGET}/include")

Expand Down Expand Up @@ -51,10 +49,11 @@ list(APPEND DALI_EXCLUDES libculibos.a)
include_directories(${CUDA_TOOLKIT_TARGET_DIR}/include)

# NVTX for profiling
if (BUILD_NVTX)
CUDA_find_library(CUDA_nvToolsExt_LIBRARY nvToolsExt)
list(APPEND DALI_LIBS ${CUDA_nvToolsExt_LIBRARY})
add_definitions(-DDALI_USE_NVTX)
if (NVTX_ENABLED)
if(${CUDA_VERSION} VERSION_LESS "10.0")
CUDA_find_library(CUDA_nvToolsExt_LIBRARY nvToolsExt)
list(APPEND DALI_LIBS ${CUDA_nvToolsExt_LIBRARY})
endif()
endif()

##################################################################
Expand Down
11 changes: 5 additions & 6 deletions cmake/Dependencies.aarch64-qnx.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ endif()
set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_HOST})
set(CUDA_TOOLKIT_TARGET_DIR ${CUDA_TARGET})

set(CUDA_VERSION "${CMAKE_CUDA_COMPILER_VERSION}")

set(CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES "${CUDA_TARGET}/lib")
set(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES "${CUDA_TARGET}/include")

Expand Down Expand Up @@ -62,10 +60,11 @@ include_directories(${CUDA_TOOLKIT_TARGET_DIR}/include)
include_directories(${CUDA_TOOLKIT_ROOT_DIR}/include)

# NVTX for profiling
if (BUILD_NVTX)
CUDA_find_library(CUDA_nvToolsExt_LIBRARY nvToolsExt)
list(APPEND DALI_LIBS ${CUDA_nvToolsExt_LIBRARY})
add_definitions(-DDALI_USE_NVTX)
if (NVTX_ENABLED)
if(${CUDA_VERSION} VERSION_LESS "10.0")
CUDA_find_library(CUDA_nvToolsExt_LIBRARY nvToolsExt)
list(APPEND DALI_LIBS ${CUDA_nvToolsExt_LIBRARY})
endif()
endif()

##################################################################
Expand Down
11 changes: 5 additions & 6 deletions cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
CUDA_find_library(CUDART_LIB cudart_static)
list(APPEND DALI_EXCLUDES libcudart_static.a)

set(CUDA_VERSION "${CMAKE_CUDA_COMPILER_VERSION}")

# For NVJPEG
if (BUILD_NVJPEG)
find_package(NVJPEG 9.0 REQUIRED)
Expand Down Expand Up @@ -60,10 +58,11 @@ list(APPEND DALI_LIBS ${CUDA_culibos_LIBRARY})
list(APPEND DALI_EXCLUDES libculibos.a)

# NVTX for profiling
if (BUILD_NVTX)
CUDA_find_library(CUDA_nvToolsExt_LIBRARY nvToolsExt)
list(APPEND DALI_LIBS ${CUDA_nvToolsExt_LIBRARY})
add_definitions(-DDALI_USE_NVTX)
if (NVTX_ENABLED)
if(${CUDA_VERSION} VERSION_LESS "10.0")
CUDA_find_library(CUDA_nvToolsExt_LIBRARY nvToolsExt)
list(APPEND DALI_LIBS ${CUDA_nvToolsExt_LIBRARY})
endif()
endif()

if (VERBOSE_LOGS)
Expand Down
3 changes: 2 additions & 1 deletion docker/build_helper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ export ARCH=${ARCH}
export CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release}
export BUILD_TEST=${BUILD_TEST:-ON}
export BUILD_BENCHMARK=${BUILD_BENCHMARK:-ON}
export BUILD_NVTX=${BUILD_NVTX:-OFF}
# use a default value as it differs for CUDA 9.x and CUDA 10.x
export BUILD_NVTX=${BUILD_NVTX}
export BUILD_PYTHON=${BUILD_PYTHON:-ON}
export BUILD_LMDB=${BUILD_LMDB:-ON}
export BUILD_JPEG_TURBO=${BUILD_JPEG_TURBO:-ON}
Expand Down
18 changes: 13 additions & 5 deletions include/dali/core/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,16 @@
#ifndef DALI_CORE_COMMON_H_
#define DALI_CORE_COMMON_H_

#ifdef DALI_USE_NVTX
#include "nvToolsExt.h"
#if NVTX_ENABLED
// Just to get CUDART_VERSION value
#include <cuda_runtime_api.h>
#if (CUDART_VERSION >= 10000)
#include "nvtx3/nvToolsExt.h"
#elif (CUDART_VERSION < 10000) // NOLINT
#include "nvToolsExt.h"
#else
#error Unknown CUDART_VERSION!
#endif
#endif

#include <array>
Expand Down Expand Up @@ -148,7 +156,7 @@ struct TimeRange {
static const uint32_t knvGreen = 0x76B900;

TimeRange(std::string name, const uint32_t rgb = kBlue) { // NOLINT
#ifdef DALI_USE_NVTX
#if NVTX_ENABLED
nvtxEventAttributes_t att;
att.version = NVTX_VERSION;
att.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
Expand All @@ -166,15 +174,15 @@ struct TimeRange {
~TimeRange() { stop(); }

void stop() {
#ifdef DALI_USE_NVTX
#if NVTX_ENABLED
if (started) {
started = false;
nvtxRangePop();
}
#endif
}

#ifdef DALI_USE_NVTX
#if NVTX_ENABLED

private:
bool started = false;
Expand Down

0 comments on commit 41efe98

Please sign in to comment.