-
Notifications
You must be signed in to change notification settings - Fork 943
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[REVIEW] Jitify versions of binaryops for non-homogeneous types #892
Changes from 51 commits
0421c02
d66bfda
c23fda5
f84a330
ca64bde
061d4e5
c9791b1
590bb5c
9017317
df64885
8fd5081
beec867
7fe29b8
8e54098
ada453e
88c57c1
b6650ce
f9dea20
48302f7
d553db0
4413a78
ca9e43e
a55f111
b3673fc
52892b6
96f83e7
fea6751
bf4dc61
358e0c0
877ac61
ca0b31f
b269ee4
322e4a5
35c2c46
d93fa0b
df641c4
aaae814
02e2a9e
e0c8fab
be31178
c5cbd8b
b5c729b
febbad6
d0e30b2
dde39a7
86ea59b
11b67dc
9520853
be21879
3946e1c
78a3cc8
99a943f
500e188
ec1116b
c0243e6
9c729c7
4626f7d
b5d1f78
7fe729a
79d5111
8c11073
87e0b41
0d797fe
09ffe36
2bfa21e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -141,6 +141,7 @@ include_directories("${ARROW_INCLUDE_DIR}" | |
"${CMAKE_SOURCE_DIR}/include" | ||
"${CMAKE_SOURCE_DIR}/src" | ||
"${CMAKE_SOURCE_DIR}/thirdparty/cub" | ||
"${CMAKE_SOURCE_DIR}/thirdparty/jitify" | ||
"${CMAKE_SOURCE_DIR}/thirdparty/moderngpu/src" | ||
"${CMAKE_SOURCE_DIR}/thirdparty/rmm/include" | ||
"${ZLIB_INCLUDE_DIRS}") | ||
|
@@ -189,6 +190,13 @@ add_library(cudf SHARED | |
src/groupby/groupby.cu | ||
src/groupby/new_groupby.cu | ||
src/binary/binary_ops.cu | ||
src/binary/jit/code/kernel.cpp | ||
mtjrider marked this conversation as resolved.
Show resolved
Hide resolved
|
||
src/binary/jit/code/operation.cpp | ||
src/binary/jit/code/traits.cpp | ||
src/binary/jit/core/binop.cpp | ||
src/binary/jit/core/launcher.cpp | ||
src/binary/jit/util/operator.cpp | ||
src/binary/jit/util/type.cpp | ||
src/bitmask/bitmask_ops.cu | ||
src/bitmask/valid_ops.cu | ||
src/compaction/stream_compaction_ops.cu | ||
|
@@ -213,6 +221,24 @@ add_library(cudf SHARED | |
#Override RPATH for cudf | ||
SET_TARGET_PROPERTIES(cudf PROPERTIES BUILD_RPATH "\$ORIGIN") | ||
|
||
################################################################################################### | ||
# - jitify ---------------------------------------------------------------------------------------- | ||
|
||
add_executable(stringify "${CMAKE_SOURCE_DIR}/thirdparty/jitify/stringify.cpp") | ||
execute_process(WORKING_DIRECTORY ${CMAKE_BINARY_DIR} | ||
COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_BINARY_DIR}/include) | ||
|
||
add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/include/types.h.jit | ||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include | ||
COMMAND ${CMAKE_BINARY_DIR}/stringify cudf/types.h > ${CMAKE_BINARY_DIR}/include/types.h.jit | ||
COMMENT "stringify header types.h" | ||
devavret marked this conversation as resolved.
Show resolved
Hide resolved
|
||
DEPENDS stringify | ||
MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/include/cudf/types.h) | ||
|
||
add_custom_target(stringify_run DEPENDS ${CMAKE_BINARY_DIR}/include/types.h.jit) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
For example:
As long as the Note that this is helpful if later you expect your command to have multiple outputs. I would also consider renaming There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried this but it didn't work. Running add_custom_command(OUTPUT STRINGIFIED_HEADERS
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include
COMMAND ${CMAKE_BINARY_DIR}/stringify cudf/types.h > ${CMAKE_BINARY_DIR}/include/types.h.jit
COMMENT "Run stringify on header types.h to convert it to c-str for use in JIT compiled code"
DEPENDS stringify
MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/include/cudf/types.h)
add_custom_target(stringified_headers DEPENDS STRINGIFIED_HEADERS)
add_dependencies(cudf stringified_headers)
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry what does the cmake code above have to do with launcher.cpp? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This code is for making and running an executable that comes with Jitify. This executable, called stringify, converts source files into c-strings and writes them to another source file. Now those stringified source files can be used in JIT code compilation. In our specific use case, we wanted to be able to use the definitions in |
||
|
||
add_dependencies(cudf stringify_run) | ||
|
||
devavret marked this conversation as resolved.
Show resolved
Hide resolved
|
||
################################################################################################### | ||
# - build options --------------------------------------------------------------------------------- | ||
|
||
|
@@ -230,11 +256,18 @@ if(HT_LEGACY_ALLOCATOR) | |
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --define-macro HT_LEGACY_ALLOCATOR") | ||
endif(HT_LEGACY_ALLOCATOR) | ||
|
||
option(JITIFY_THREAD_SAFE "Use a global cache for JIT compiled kernels" ON) | ||
if(JITIFY_THREAD_SAFE) | ||
kkraus14 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
message(STATUS "Using global cache for JIT compiled kernels") | ||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --define-macro JITIFY_THREAD_SAFE") | ||
endif(JITIFY_THREAD_SAFE) | ||
|
||
|
||
################################################################################################### | ||
# - link libraries -------------------------------------------------------------------------------- | ||
|
||
target_link_libraries(cudf rmm "${ARROW_LIB}" ${ZLIB_LIBRARIES} NVStrings) | ||
# TODO: better nvrtc linking with optional variables | ||
target_link_libraries(cudf rmm "${ARROW_LIB}" ${ZLIB_LIBRARIES} NVStrings nvrtc) | ||
|
||
################################################################################################### | ||
# - python cffi bindings -------------------------------------------------------------------------- | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -85,6 +85,35 @@ typedef struct { | |
// here we can also hold info for decimal datatype or any other datatype that requires additional information | ||
} gdf_dtype_extra_info; | ||
|
||
/**---------------------------------------------------------------------------* | ||
* @union gdf_data | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't this be
|
||
* @brief Union used for scalar type. | ||
* It stores a unique value for scalar type. | ||
* It has a direct relationship with the gdf_dtype. | ||
*---------------------------------------------------------------------------**/ | ||
typedef union { | ||
int8_t si08; /**< GDF_INT8 */ | ||
int16_t si16; /**< GDF_INT16 */ | ||
int32_t si32; /**< GDF_INT32 */ | ||
int64_t si64; /**< GDF_INT64 */ | ||
float fp32; /**< GDF_FLOAT32 */ | ||
double fp64; /**< GDF_FLOAT64 */ | ||
int32_t dt32; /**< GDF_DATE32 */ | ||
devavret marked this conversation as resolved.
Show resolved
Hide resolved
devavret marked this conversation as resolved.
Show resolved
Hide resolved
devavret marked this conversation as resolved.
Show resolved
Hide resolved
|
||
int64_t dt64; /**< GDF_DATE64 */ | ||
devavret marked this conversation as resolved.
Show resolved
Hide resolved
|
||
int64_t tmst; /**< GDF_TIMESTAMP */ | ||
devavret marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} gdf_data; | ||
|
||
/**---------------------------------------------------------------------------* | ||
* @brief A struct to hold a scalar (single) value and its type information | ||
* | ||
*---------------------------------------------------------------------------**/ | ||
typedef struct { | ||
gdf_data data; /**< Pointer to the scalar data */ | ||
devavret marked this conversation as resolved.
Show resolved
Hide resolved
|
||
gdf_dtype dtype; /**< The datatype of the scalar's data */ | ||
bool is_valid; /**< False if the value is null */ | ||
} gdf_scalar; | ||
|
||
|
||
typedef struct gdf_column_{ | ||
void *data; /**< Pointer to the columns data */ | ||
gdf_valid_type *valid; /**< Pointer to the columns validity bit mask where the 'i'th bit indicates if the 'i'th row is NULL */ | ||
|
@@ -156,6 +185,29 @@ typedef enum { | |
GDF_NUM_COLORS, /** Add new colors above this line */ | ||
} gdf_color; | ||
|
||
|
||
/**---------------------------------------------------------------------------* | ||
* @brief Types of binary operations that can be performed on data. | ||
* | ||
*---------------------------------------------------------------------------**/ | ||
typedef enum { | ||
GDF_ADD, /**< operator + */ | ||
GDF_SUB, /**< operator - */ | ||
GDF_MUL, /**< operator * */ | ||
GDF_DIV, /**< operator / using common type of lhs and rhs */ | ||
GDF_TRUE_DIV, /**< operator / after promoting type to floating point*/ | ||
kkraus14 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
GDF_FLOOR_DIV, /**< operator / after promoting to float and then flooring the result */ | ||
GDF_MOD, /**< operator % */ | ||
GDF_POW, /**< lhs ^ rhs */ | ||
GDF_EQUAL, /**< operator == */ | ||
GDF_NOT_EQUAL, /**< operator != */ | ||
GDF_LESS, /**< operator < */ | ||
GDF_GREATER, /**< operator > */ | ||
GDF_LESS_EQUAL, /**< operator <= */ | ||
GDF_GREATER_EQUAL, /**< operator >= */ | ||
} gdf_binary_operator; | ||
|
||
|
||
/* --------------------------------------------------------------------------*/ | ||
/** | ||
* @brief This struct holds various information about how an operation should be | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
/* | ||
* Copyright (c) 2019, NVIDIA CORPORATION. | ||
* | ||
* Copyright 2018-2019 BlazingDB, Inc. | ||
* Copyright 2018 Christian Noboa Mardini <[email protected]> | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#ifndef GDF_BINARY_OPERATION_JIT_CODE_CODE_H | ||
#define GDF_BINARY_OPERATION_JIT_CODE_CODE_H | ||
|
||
namespace cudf { | ||
namespace binops { | ||
namespace jit { | ||
namespace code { | ||
|
||
extern const char* kernel; | ||
extern const char* traits; | ||
extern const char* operation; | ||
|
||
} | ||
} | ||
} | ||
} | ||
|
||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
/* | ||
* Copyright (c) 2019, NVIDIA CORPORATION. | ||
* | ||
* Copyright 2018-2019 BlazingDB, Inc. | ||
* Copyright 2018 Christian Noboa Mardini <[email protected]> | ||
* Copyright 2018 Rommel Quintanilla <[email protected]> | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
namespace cudf { | ||
namespace binops { | ||
namespace jit { | ||
namespace code { | ||
|
||
const char* kernel = | ||
R"***( | ||
#include "operation.h" | ||
#include "cudf/types.h" | ||
|
||
template <typename TypeOut, typename TypeLhs, typename TypeRhs, typename TypeOpe> | ||
__global__ | ||
void kernel_v_s(gdf_size_type size, | ||
TypeOut* out_data, TypeLhs* lhs_data, gdf_data rhs_data) { | ||
int tid = threadIdx.x; | ||
int blkid = blockIdx.x; | ||
int blksz = blockDim.x; | ||
int gridsz = gridDim.x; | ||
|
||
int start = tid + blkid * blksz; | ||
int step = blksz * gridsz; | ||
|
||
for (gdf_size_type i=start; i<size; i+=step) { | ||
out_data[i] = TypeOpe::template operate<TypeOut, TypeLhs, TypeRhs>(lhs_data[i], *reinterpret_cast<TypeRhs*>(&rhs_data)); | ||
} | ||
} | ||
|
||
template <typename TypeOut, typename TypeLhs, typename TypeRhs, typename TypeOpe> | ||
__global__ | ||
void kernel_v_v(gdf_size_type size, | ||
TypeOut* out_data, TypeLhs* lhs_data, TypeRhs* rhs_data) { | ||
int tid = threadIdx.x; | ||
int blkid = blockIdx.x; | ||
int blksz = blockDim.x; | ||
int gridsz = gridDim.x; | ||
|
||
int start = tid + blkid * blksz; | ||
int step = blksz * gridsz; | ||
|
||
for (gdf_size_type i=start; i<size; i+=step) { | ||
out_data[i] = TypeOpe::template operate<TypeOut, TypeLhs, TypeRhs>(lhs_data[i], rhs_data[i]); | ||
} | ||
} | ||
)***"; | ||
|
||
} // namespace code | ||
} // namespace jit | ||
} // namespace binops | ||
} // namespace cudf |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we pin this to a specific branch / tag / commit?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, please pin to a commit to avoid build issues in the future.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will this do? Or do I need to specify it in
.gitmodules
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need to specify it in
.gitmodules
otherwise agit submodule update --init --recursive --remote
will update it to the latest commit of master.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can't seem to find a way to pin this to a commit/tag in the
.gitmodules
file. I read this andupdate
seems to be the only option. But details on update say that the only way to do this is to usenone
. Is that what I should do?