Skip to content

Commit

Permalink
Add support for GPU based numpy reader (NVIDIA#2477)
Browse files Browse the repository at this point in the history
- GPU based numpy reader uses GPU Direct Storage via cufile library implementation

Authored-by: Thorsten Kurth <[email protected]>
Co-authored-by: Michał Zientkiewicz <[email protected]>
Co-authored-by: Janusz Lisiecki <[email protected]>
Signed-off-by: Janusz Lisiecki <[email protected]>
  • Loading branch information
3 people authored Nov 26, 2020
1 parent 647cf31 commit add3a20
Show file tree
Hide file tree
Showing 61 changed files with 1,755 additions and 365 deletions.
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ cmake_dependent_option(BUILD_NVOF "Build with NVIDIA OPTICAL FLOW SDK support" O
"NOT BUILD_DALI_NODEPS" OFF)
cmake_dependent_option(BUILD_NVML "Build with NVIDIA Management Library (NVML) support" ON
"NOT BUILD_DALI_NODEPS" OFF)
if(NOT (${ARCH} MATCHES "aarch64"))
cmake_dependent_option(BUILD_CUFILE "Build with cufile (GPU Direct Storage) support" ON
"NOT BUILD_DALI_NODEPS" OFF)
endif()

if (BUILD_DALI_NODEPS)
set(BUILD_OPENCV OFF)
Expand Down Expand Up @@ -142,6 +146,7 @@ propagate_option(BUILD_NVJPEG2K)
propagate_option(BUILD_NVOF)
propagate_option(BUILD_NVDEC)
propagate_option(BUILD_NVML)
propagate_option(BUILD_CUFILE)
propagate_option(LINK_DRIVER)

get_dali_version(${PROJECT_SOURCE_DIR}/VERSION DALI_VERSION)
Expand Down
1 change: 1 addition & 0 deletions cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ if (NVTX_ENABLED)
endif()
endif()

# verbose
if (VERBOSE_LOGS)
add_definitions(-DDALI_VERBOSE_LOGS)
endif()
Expand Down
3 changes: 3 additions & 0 deletions cmake/libdali.map.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
DALI_@DALI_VERSION@ {
global:
*;
# cufile.h declares all its symbols with the default visibility so our wrappers
# are exported as well. We don't want this so hide it explicitly here
local: cuFile*;
};
3 changes: 3 additions & 0 deletions cmake/libdali_core.map.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
DALI_CORE_@DALI_VERSION@ {
global:
*;
# cufile.h declares all its symbols with the default visibility so our wrappers
# are exported as well. We don't want this so hide it explicitly here
local: cuFile*;
};
3 changes: 3 additions & 0 deletions cmake/libdali_kernels.map.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
DALI_KERNELS_@DALI_VERSION@ {
global:
*;
# cufile.h declares all its symbols with the default visibility so our wrappers
# are exported as well. We don't want this so hide it explicitly here
local: cuFile*;
};
3 changes: 3 additions & 0 deletions cmake/libdali_operators.map.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
DALI_OPERATORS_@DALI_VERSION@ {
global:
*;
# cufile.h declares all its symbols with the default visibility so our wrappers
# are exported as well. We don't want this so hide it explicitly here
local: cuFile*;
};
8 changes: 7 additions & 1 deletion dali/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,10 @@ if (BUILD_NVML)
target_link_libraries(dali PRIVATE $<TARGET_OBJECTS:dynlink_nvml>)
endif(BUILD_NVML)

################################################
if (BUILD_CUFILE)
target_link_libraries(dali PRIVATE dynlink_cufile)
endif()

# Build test suite
################################################
if (BUILD_DALI_PIPELINE AND BUILD_TEST)
Expand All @@ -90,6 +93,9 @@ if (BUILD_DALI_PIPELINE AND BUILD_TEST)
if (BUILD_NVML)
target_link_libraries(dali_test PRIVATE $<TARGET_OBJECTS:dynlink_nvml>)
endif(BUILD_NVML)
if (BUILD_CUFILE)
target_link_libraries(dali_test PRIVATE dynlink_cufile)
endif()
target_link_libraries(dali_test PRIVATE "-pie")
set_target_properties(dali_test PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${TEST_BINARY_DIR})
set_target_properties(dali_test PROPERTIES POSITION_INDEPENDENT_CODE ON)
Expand Down
30 changes: 29 additions & 1 deletion dali/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,36 @@ else()
add_library(dynlink_cuda STATIC dynlink_cuda.cc)
endif()

list(REMOVE_ITEM DALI_CORE_SRCS dynlink_cuda.cc)
list(FILTER DALI_CORE_SRCS EXCLUDE REGEX ".*dynlink_cuda.cc")

if (BUILD_CUFILE)
set(CUFILE_GENERATED_STUB "${CMAKE_CURRENT_BINARY_DIR}/dynlink_cufile_gen.cc")
add_custom_command(
OUTPUT ${CUFILE_GENERATED_STUB}
COMMAND python ${CMAKE_CURRENT_SOURCE_DIR}/../../tools/stub_generator/stub_codegen.py --unique_prefix=Cufile --
"${CMAKE_CURRENT_SOURCE_DIR}/../../tools/stub_generator/cufile.json" ${CUFILE_GENERATED_STUB}
"${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}/cufile.h" "-I${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}"
# for some reason QNX fails with 'too many errors emitted' is this is not set
"-ferror-limit=0"
${DEFAULT_COMPILER_INCLUDE}
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/../../tools/stub_generator/stub_codegen.py
"${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}/cufile.h"
"${CMAKE_CURRENT_SOURCE_DIR}/../../tools/stub_generator/cufile.json"
COMMENT "Running cufile.h stub generator"
VERBATIM)

set_source_files_properties(${CUFILE_GENERATED_STUB} PROPERTIES GENERATED TRUE)
add_library(dynlink_cufile STATIC dynlink_cufile.cc ${CUFILE_GENERATED_STUB})
endif()

list(FILTER DALI_CORE_SRCS EXCLUDE REGEX ".*dynlink_cufile.cc")

add_library(dali_core ${LIBTYPE} ${DALI_CORE_SRCS})
target_include_directories(dali_core PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
target_link_libraries(dali_core PRIVATE dynlink_cuda ${CUDART_LIB})
if (BUILD_CUFILE)
target_link_libraries(dali_core PRIVATE dynlink_cufile)
endif()
target_link_libraries(dali_core PUBLIC ${DALI_SYSTEM_LIBS})
target_link_libraries(dali_core PRIVATE "-Wl,--exclude-libs,${exclude_libs}")
set_target_properties(dali_core PROPERTIES
Expand All @@ -67,6 +92,9 @@ if (BUILD_TEST)
add_executable(dali_core_test "${DALI_CORE_TEST_SRCS}")
target_link_libraries(dali_core_test PUBLIC dali_core)
target_link_libraries(dali_core_test PRIVATE gtest dynlink_cuda ${DALI_LIBS})
if (BUILD_CUFILE)
target_link_libraries(dali_core_test PRIVATE dynlink_cufile)
endif()
target_link_libraries(dali_core_test PRIVATE "-Wl,--exclude-libs,${exclude_libs}")
target_link_libraries(dali_core_test PRIVATE "-pie")
set_target_properties(dali_core_test PROPERTIES POSITION_INDEPENDENT_CODE ON)
Expand Down
24 changes: 6 additions & 18 deletions dali/core/dynlink_cuda.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@

namespace {

typedef void *CUDADRIVER;
typedef void* CUDADRIVER;

static char __CudaLibName[] = "libcuda.so";
static char __CudaLibName1[] = "libcuda.so.1";
static const char __CudaLibName[] = "libcuda.so";
static const char __CudaLibName1[] = "libcuda.so.1";

CUDADRIVER loadCudaLibrary() {
CUDADRIVER ret = nullptr;
Expand Down Expand Up @@ -54,24 +54,12 @@ typedef void *tLoadSymbol(const char *name);
void CudaSetSymbolLoader(tLoadSymbol loader_func);

bool cuInitChecked() {
static std::mutex m;
static bool initialized = false;

if (initialized)
return true;

std::lock_guard<std::mutex> lock(m);

if (initialized)
return true;

// set symbol loader for this library
#if !LINK_DRIVER_ENABLED
CudaSetSymbolLoader(LoadSymbol);
static std::once_flag cuda_once;
std::call_once(cuda_once, CudaSetSymbolLoader, LoadSymbol);
#endif
static CUresult res = cuInit(0);
initialized = (res == CUDA_SUCCESS);
return initialized;
return res == CUDA_SUCCESS;
}

bool cuIsSymbolAvailable(const char *name) {
Expand Down
61 changes: 61 additions & 0 deletions dali/core/dynlink_cufile.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <dlfcn.h>
#include <stdio.h>
#include <mutex>
#include <string>
#include <unordered_map>
#include "dali/core/dynlink_cufile.h"

namespace {

typedef void* CUFILE;

static const char __CufileLibName[] = "libcufile.so";
static const char __CufileLibName1[] = "libcufile.so.1";

CUFILE loadCufileLibrary() {
CUFILE ret = nullptr;

ret = dlopen(__CufileLibName1, RTLD_NOW);

if (!ret) {
ret = dlopen(__CufileLibName, RTLD_NOW);

if (!ret) {
printf("dlopen \"%s\" failed!\n", __CufileLibName);
}
}
return ret;
}

void *LoadSymbol(const char *name) {
static CUFILE cufileDrvLib = loadCufileLibrary();
void *ret = cufileDrvLib ? dlsym(cufileDrvLib, name) : nullptr;
return ret;
}

} // namespace

// it is defined in the generated file
typedef void *tLoadSymbol(const char *name);
void CufileSetSymbolLoader(tLoadSymbol loader_func);

void cufileInit() {
#if CUFILE_ENABLED
static std::once_flag cufile_once;
std::call_once(cufile_once, CufileSetSymbolLoader, LoadSymbol);
#endif
}
6 changes: 6 additions & 0 deletions dali/operators/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ set_target_properties(dali_operators PROPERTIES
LIBRARY_OUTPUT_DIRECTORY "${DALI_LIBRARY_OUTPUT_DIR}")
target_link_libraries(dali_operators PUBLIC dali dali_kernels dali_core)
target_link_libraries(dali_operators PRIVATE dynlink_cuda ${DALI_LIBS})
if (BUILD_CUFILE)
target_link_libraries(dali_operators PRIVATE dynlink_cufile)
endif()
# Exclude (most) statically linked dali dependencies from the exports of libdali_operators.so
target_link_libraries(dali_operators PRIVATE "-Wl,--exclude-libs,${exclude_libs}")
# Options for using Dockerfile FFmpeg version
Expand All @@ -78,6 +81,9 @@ if (BUILD_TEST)
if (BUILD_NVML)
target_link_libraries(dali_operator_test PRIVATE $<TARGET_OBJECTS:dynlink_nvml>)
endif(BUILD_NVML)
if (BUILD_CUFILE)
target_link_libraries(dali_operator_test PRIVATE dynlink_cufile)
endif()
target_link_libraries(dali_operator_test PRIVATE "-Wl,--exclude-libs,${exclude_libs}")
target_link_libraries(dali_operator_test PRIVATE "-pie")
set_target_properties(dali_operator_test PROPERTIES POSITION_INDEPENDENT_CODE ON)
Expand Down
7 changes: 6 additions & 1 deletion dali/operators/reader/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,14 @@ collect_headers(DALI_INST_HDRS PARENT_SCOPE) # TODO (ONLY SUPPORTED ONES)

list(APPEND DALI_OPERATOR_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/file_reader_op.cc")
list(APPEND DALI_OPERATOR_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/numpy_reader_op.cc")

if (BUILD_CUFILE)
list(APPEND DALI_OPERATOR_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/numpy_reader_gpu_op.cc")
endif()

list(APPEND DALI_OPERATOR_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/sequence_reader_op.cc")

if(BUILD_NVDEC)
if (BUILD_NVDEC)
list(APPEND DALI_OPERATOR_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/video_reader_op.cc")
list(APPEND DALI_OPERATOR_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/video_reader_resize_op.cc")
endif()
Expand Down
13 changes: 12 additions & 1 deletion dali/operators/reader/loader/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,20 @@ collect_headers(DALI_INST_HDRS PARENT_SCOPE)

set(DALI_OPERATOR_SRCS ${DALI_OPERATOR_SRCS}
"${CMAKE_CURRENT_SOURCE_DIR}/filesystem.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/file_loader.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/file_label_loader.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/coco_loader.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/loader.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/sequence_loader.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/numpy_loader.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/utils.cc")


if (BUILD_CUFILE)
set(DALI_OPERATOR_SRCS ${DALI_OPERATOR_SRCS}
"${CMAKE_CURRENT_SOURCE_DIR}/cufile_loader.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/numpy_loader_gpu.cc")
endif()

if (BUILD_LIBSND)
set(DALI_OPERATOR_SRCS ${DALI_OPERATOR_SRCS}
"${CMAKE_CURRENT_SOURCE_DIR}/nemo_asr_loader.cc")
Expand All @@ -42,6 +48,11 @@ set(DALI_OPERATOR_TEST_SRCS ${DALI_OPERATOR_TEST_SRCS}
"${CMAKE_CURRENT_SOURCE_DIR}/sequence_loader_test.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/numpy_loader_test.cc")

if (BUILD_CUFILE)
set(DALI_OPERATOR_TEST_SRCS ${DALI_OPERATOR_TEST_SRCS}
"${CMAKE_CURRENT_SOURCE_DIR}/cufile_loader.cc")
endif()

if (BUILD_LIBSND)
set(DALI_OPERATOR_TEST_SRCS ${DALI_OPERATOR_TEST_SRCS}
"${CMAKE_CURRENT_SOURCE_DIR}/nemo_asr_loader_test.cc")
Expand Down
57 changes: 57 additions & 0 deletions dali/operators/reader/loader/cufile_loader.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <dirent.h>
#include <errno.h>
#include <glob.h>
#include <memory>

#include "dali/core/common.h"
#include "dali/operators/reader/loader/cufile_loader.h"
#include "dali/operators/reader/loader/file_loader.h"
#include "dali/operators/reader/loader/utils.h"
#include "dali/util/cufile.h"
#include "dali/util/cufile_helper.h"

namespace dali {

CUFileLoader::CUFileLoader(const OpSpec& spec, vector<std::string> images, bool shuffle_after_epoch)
: FileLoader<GPUBackend, ImageFileWrapperGPU, CUFileStream>(spec) {
// set the device first
DeviceGuard g(device_id_);

// this is needed for the driver singleton
static std::mutex open_driver_mutex;
static std::weak_ptr<cufile::CUFileDriverHandle> driver_handle;

// load the cufile driver
std::lock_guard<std::mutex> dlock(open_driver_mutex);
if (!(d_ = driver_handle.lock())) {
d_ = std::make_shared<cufile::CUFileDriverHandle>(device_id_);
driver_handle = d_;
}
}

void CUFileLoader::PrepareEmpty(ImageFileWrapperGPU& image_file) {
PrepareEmptyTensor(image_file.image);
image_file.filename.clear();
}

void CUFileLoader::ReadSample(ImageFileWrapperGPU& imfile) {
// set the device first
DeviceGuard g(device_id_);
this->FileLoader<GPUBackend, ImageFileWrapperGPU, CUFileStream>::ReadSample(imfile);
}

} // namespace dali
Loading

0 comments on commit add3a20

Please sign in to comment.